1 | //===--- JSON.h - JSON values, parsing and serialization -------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===---------------------------------------------------------------------===// |
8 | /// |
9 | /// \file |
10 | /// This file supports working with JSON data. |
11 | /// |
12 | /// It comprises: |
13 | /// |
14 | /// - classes which hold dynamically-typed parsed JSON structures |
15 | /// These are value types that can be composed, inspected, and modified. |
16 | /// See json::Value, and the related types json::Object and json::Array. |
17 | /// |
18 | /// - functions to parse JSON text into Values, and to serialize Values to text. |
19 | /// See parse(), operator<<, and format_provider. |
20 | /// |
21 | /// - a convention and helpers for mapping between json::Value and user-defined |
22 | /// types. See fromJSON(), ObjectMapper, and the class comment on Value. |
23 | /// |
24 | /// - an output API json::OStream which can emit JSON without materializing |
25 | /// all structures as json::Value. |
26 | /// |
27 | /// Typically, JSON data would be read from an external source, parsed into |
28 | /// a Value, and then converted into some native data structure before doing |
29 | /// real work on it. (And vice versa when writing). |
30 | /// |
31 | /// Other serialization mechanisms you may consider: |
32 | /// |
33 | /// - YAML is also text-based, and more human-readable than JSON. It's a more |
34 | /// complex format and data model, and YAML parsers aren't ubiquitous. |
35 | /// YAMLParser.h is a streaming parser suitable for parsing large documents |
36 | /// (including JSON, as YAML is a superset). It can be awkward to use |
37 | /// directly. YAML I/O (YAMLTraits.h) provides data mapping that is more |
38 | /// declarative than the toJSON/fromJSON conventions here. |
39 | /// |
40 | /// - LLVM bitstream is a space- and CPU- efficient binary format. Typically it |
41 | /// encodes LLVM IR ("bitcode"), but it can be a container for other data. |
42 | /// Low-level reader/writer libraries are in Bitstream/Bitstream*.h |
43 | /// |
44 | //===---------------------------------------------------------------------===// |
45 | |
46 | #ifndef LLVM_SUPPORT_JSON_H |
47 | #define LLVM_SUPPORT_JSON_H |
48 | |
49 | #include "llvm/ADT/DenseMap.h" |
50 | #include "llvm/ADT/STLFunctionalExtras.h" |
51 | #include "llvm/ADT/SmallVector.h" |
52 | #include "llvm/ADT/StringRef.h" |
53 | #include "llvm/Support/Compiler.h" |
54 | #include "llvm/Support/Error.h" |
55 | #include "llvm/Support/FormatVariadic.h" |
56 | #include "llvm/Support/raw_ostream.h" |
57 | #include <cmath> |
58 | #include <map> |
59 | |
60 | namespace llvm { |
61 | namespace json { |
62 | |
63 | // === String encodings === |
64 | // |
65 | // JSON strings are character sequences (not byte sequences like std::string). |
66 | // We need to know the encoding, and for simplicity only support UTF-8. |
67 | // |
68 | // - When parsing, invalid UTF-8 is a syntax error like any other |
69 | // |
70 | // - When creating Values from strings, callers must ensure they are UTF-8. |
71 | // with asserts on, invalid UTF-8 will crash the program |
72 | // with asserts off, we'll substitute the replacement character (U+FFFD) |
73 | // Callers can use json::isUTF8() and json::fixUTF8() for validation. |
74 | // |
75 | // - When retrieving strings from Values (e.g. asString()), the result will |
76 | // always be valid UTF-8. |
77 | |
78 | template <typename T> |
79 | constexpr bool is_uint_64_bit_v = |
80 | std::is_integral_v<T> && std::is_unsigned_v<T> && |
81 | sizeof(T) == sizeof(uint64_t); |
82 | |
83 | /// Returns true if \p S is valid UTF-8, which is required for use as JSON. |
84 | /// If it returns false, \p Offset is set to a byte offset near the first error. |
85 | bool isUTF8(llvm::StringRef S, size_t *ErrOffset = nullptr); |
86 | /// Replaces invalid UTF-8 sequences in \p S with the replacement character |
87 | /// (U+FFFD). The returned string is valid UTF-8. |
88 | /// This is much slower than isUTF8, so test that first. |
89 | std::string fixUTF8(llvm::StringRef S); |
90 | |
91 | class Array; |
92 | class ObjectKey; |
93 | class Value; |
94 | template <typename T> Value toJSON(const std::optional<T> &Opt); |
95 | |
96 | /// An Object is a JSON object, which maps strings to heterogenous JSON values. |
97 | /// It simulates DenseMap<ObjectKey, Value>. ObjectKey is a maybe-owned string. |
98 | class Object { |
99 | using Storage = DenseMap<ObjectKey, Value, llvm::DenseMapInfo<StringRef>>; |
100 | Storage M; |
101 | |
102 | public: |
103 | using key_type = ObjectKey; |
104 | using mapped_type = Value; |
105 | using value_type = Storage::value_type; |
106 | using iterator = Storage::iterator; |
107 | using const_iterator = Storage::const_iterator; |
108 | |
109 | Object() = default; |
110 | // KV is a trivial key-value struct for list-initialization. |
111 | // (using std::pair forces extra copies). |
112 | struct KV; |
113 | explicit Object(std::initializer_list<KV> Properties); |
114 | |
115 | iterator begin() { return M.begin(); } |
116 | const_iterator begin() const { return M.begin(); } |
117 | iterator end() { return M.end(); } |
118 | const_iterator end() const { return M.end(); } |
119 | |
120 | bool empty() const { return M.empty(); } |
121 | size_t size() const { return M.size(); } |
122 | |
123 | void clear() { M.clear(); } |
124 | std::pair<iterator, bool> insert(KV E); |
125 | template <typename... Ts> |
126 | std::pair<iterator, bool> try_emplace(const ObjectKey &K, Ts &&... Args) { |
127 | return M.try_emplace(K, std::forward<Ts>(Args)...); |
128 | } |
129 | template <typename... Ts> |
130 | std::pair<iterator, bool> try_emplace(ObjectKey &&K, Ts &&... Args) { |
131 | return M.try_emplace(std::move(K), std::forward<Ts>(Args)...); |
132 | } |
133 | bool erase(StringRef K); |
134 | void erase(iterator I) { M.erase(I); } |
135 | |
136 | iterator find(StringRef K) { return M.find_as(Val: K); } |
137 | const_iterator find(StringRef K) const { return M.find_as(Val: K); } |
138 | // operator[] acts as if Value was default-constructible as null. |
139 | Value &operator[](const ObjectKey &K); |
140 | Value &operator[](ObjectKey &&K); |
141 | // Look up a property, returning nullptr if it doesn't exist. |
142 | Value *get(StringRef K); |
143 | const Value *get(StringRef K) const; |
144 | // Typed accessors return std::nullopt/nullptr if |
145 | // - the property doesn't exist |
146 | // - or it has the wrong type |
147 | std::optional<std::nullptr_t> getNull(StringRef K) const; |
148 | std::optional<bool> getBoolean(StringRef K) const; |
149 | std::optional<double> getNumber(StringRef K) const; |
150 | std::optional<int64_t> getInteger(StringRef K) const; |
151 | std::optional<llvm::StringRef> getString(StringRef K) const; |
152 | const json::Object *getObject(StringRef K) const; |
153 | json::Object *getObject(StringRef K); |
154 | const json::Array *getArray(StringRef K) const; |
155 | json::Array *getArray(StringRef K); |
156 | }; |
157 | bool operator==(const Object &LHS, const Object &RHS); |
158 | inline bool operator!=(const Object &LHS, const Object &RHS) { |
159 | return !(LHS == RHS); |
160 | } |
161 | |
162 | /// An Array is a JSON array, which contains heterogeneous JSON values. |
163 | /// It simulates std::vector<Value>. |
164 | class Array { |
165 | std::vector<Value> V; |
166 | |
167 | public: |
168 | using value_type = Value; |
169 | using iterator = std::vector<Value>::iterator; |
170 | using const_iterator = std::vector<Value>::const_iterator; |
171 | |
172 | Array() = default; |
173 | explicit Array(std::initializer_list<Value> Elements); |
174 | template <typename Collection> explicit Array(const Collection &C) { |
175 | for (const auto &V : C) |
176 | emplace_back(V); |
177 | } |
178 | |
179 | Value &operator[](size_t I); |
180 | const Value &operator[](size_t I) const; |
181 | Value &front(); |
182 | const Value &front() const; |
183 | Value &back(); |
184 | const Value &back() const; |
185 | Value *data(); |
186 | const Value *data() const; |
187 | |
188 | iterator begin(); |
189 | const_iterator begin() const; |
190 | iterator end(); |
191 | const_iterator end() const; |
192 | |
193 | bool empty() const; |
194 | size_t size() const; |
195 | void reserve(size_t S); |
196 | |
197 | void clear(); |
198 | void push_back(const Value &E); |
199 | void push_back(Value &&E); |
200 | template <typename... Args> void emplace_back(Args &&...A); |
201 | void pop_back(); |
202 | iterator insert(const_iterator P, const Value &E); |
203 | iterator insert(const_iterator P, Value &&E); |
204 | template <typename It> iterator insert(const_iterator P, It A, It Z); |
205 | template <typename... Args> iterator emplace(const_iterator P, Args &&...A); |
206 | |
207 | friend bool operator==(const Array &L, const Array &R); |
208 | }; |
209 | inline bool operator!=(const Array &L, const Array &R) { return !(L == R); } |
210 | |
211 | /// A Value is an JSON value of unknown type. |
212 | /// They can be copied, but should generally be moved. |
213 | /// |
214 | /// === Composing values === |
215 | /// |
216 | /// You can implicitly construct Values from: |
217 | /// - strings: std::string, SmallString, formatv, StringRef, char* |
218 | /// (char*, and StringRef are references, not copies!) |
219 | /// - numbers |
220 | /// - booleans |
221 | /// - null: nullptr |
222 | /// - arrays: {"foo", 42.0, false} |
223 | /// - serializable things: types with toJSON(const T&)->Value, found by ADL |
224 | /// |
225 | /// They can also be constructed from object/array helpers: |
226 | /// - json::Object is a type like map<ObjectKey, Value> |
227 | /// - json::Array is a type like vector<Value> |
228 | /// These can be list-initialized, or used to build up collections in a loop. |
229 | /// json::ary(Collection) converts all items in a collection to Values. |
230 | /// |
231 | /// === Inspecting values === |
232 | /// |
233 | /// Each Value is one of the JSON kinds: |
234 | /// null (nullptr_t) |
235 | /// boolean (bool) |
236 | /// number (double, int64 or uint64) |
237 | /// string (StringRef) |
238 | /// array (json::Array) |
239 | /// object (json::Object) |
240 | /// |
241 | /// The kind can be queried directly, or implicitly via the typed accessors: |
242 | /// if (std::optional<StringRef> S = E.getAsString() |
243 | /// assert(E.kind() == Value::String); |
244 | /// |
245 | /// Array and Object also have typed indexing accessors for easy traversal: |
246 | /// Expected<Value> E = parse(R"( {"options": {"font": "sans-serif"}} )"); |
247 | /// if (Object* O = E->getAsObject()) |
248 | /// if (Object* Opts = O->getObject("options")) |
249 | /// if (std::optional<StringRef> Font = Opts->getString("font")) |
250 | /// assert(Opts->at("font").kind() == Value::String); |
251 | /// |
252 | /// === Converting JSON values to C++ types === |
253 | /// |
254 | /// The convention is to have a deserializer function findable via ADL: |
255 | /// fromJSON(const json::Value&, T&, Path) -> bool |
256 | /// |
257 | /// The return value indicates overall success, and Path is used for precise |
258 | /// error reporting. (The Path::Root passed in at the top level fromJSON call |
259 | /// captures any nested error and can render it in context). |
260 | /// If conversion fails, fromJSON calls Path::report() and immediately returns. |
261 | /// This ensures that the first fatal error survives. |
262 | /// |
263 | /// Deserializers are provided for: |
264 | /// - bool |
265 | /// - int and int64_t |
266 | /// - double |
267 | /// - std::string |
268 | /// - vector<T>, where T is deserializable |
269 | /// - map<string, T>, where T is deserializable |
270 | /// - std::optional<T>, where T is deserializable |
271 | /// ObjectMapper can help writing fromJSON() functions for object types. |
272 | /// |
273 | /// For conversion in the other direction, the serializer function is: |
274 | /// toJSON(const T&) -> json::Value |
275 | /// If this exists, then it also allows constructing Value from T, and can |
276 | /// be used to serialize vector<T>, map<string, T>, and std::optional<T>. |
277 | /// |
278 | /// === Serialization === |
279 | /// |
280 | /// Values can be serialized to JSON: |
281 | /// 1) raw_ostream << Value // Basic formatting. |
282 | /// 2) raw_ostream << formatv("{0}", Value) // Basic formatting. |
283 | /// 3) raw_ostream << formatv("{0:2}", Value) // Pretty-print with indent 2. |
284 | /// |
285 | /// And parsed: |
286 | /// Expected<Value> E = json::parse("[1, 2, null]"); |
287 | /// assert(E && E->kind() == Value::Array); |
288 | class Value { |
289 | public: |
290 | enum Kind { |
291 | Null, |
292 | Boolean, |
293 | /// Number values can store both int64s and doubles at full precision, |
294 | /// depending on what they were constructed/parsed from. |
295 | Number, |
296 | String, |
297 | Array, |
298 | Object, |
299 | }; |
300 | |
301 | // It would be nice to have Value() be null. But that would make {} null too. |
302 | Value(const Value &M) { copyFrom(M); } |
303 | Value(Value &&M) { moveFrom(M: std::move(M)); } |
304 | Value(std::initializer_list<Value> Elements); |
305 | Value(json::Array &&Elements) : Type(T_Array) { |
306 | create<json::Array>(V: std::move(Elements)); |
307 | } |
308 | template <typename Elt> |
309 | Value(const std::vector<Elt> &C) : Value(json::Array(C)) {} |
310 | Value(json::Object &&Properties) : Type(T_Object) { |
311 | create<json::Object>(V: std::move(Properties)); |
312 | } |
313 | template <typename Elt> |
314 | Value(const std::map<std::string, Elt> &C) : Value(json::Object(C)) {} |
315 | // Strings: types with value semantics. Must be valid UTF-8. |
316 | Value(std::string V) : Type(T_String) { |
317 | if (LLVM_UNLIKELY(!isUTF8(V))) { |
318 | assert(false && "Invalid UTF-8 in value used as JSON" ); |
319 | V = fixUTF8(S: std::move(V)); |
320 | } |
321 | create<std::string>(V: std::move(V)); |
322 | } |
323 | Value(const llvm::SmallVectorImpl<char> &V) |
324 | : Value(std::string(V.begin(), V.end())) {} |
325 | Value(const llvm::formatv_object_base &V) : Value(V.str()) {} |
326 | // Strings: types with reference semantics. Must be valid UTF-8. |
327 | Value(StringRef V) : Type(T_StringRef) { |
328 | create<llvm::StringRef>(V); |
329 | if (LLVM_UNLIKELY(!isUTF8(V))) { |
330 | assert(false && "Invalid UTF-8 in value used as JSON" ); |
331 | *this = Value(fixUTF8(S: V)); |
332 | } |
333 | } |
334 | Value(const char *V) : Value(StringRef(V)) {} |
335 | Value(std::nullptr_t) : Type(T_Null) {} |
336 | // Boolean (disallow implicit conversions). |
337 | // (The last template parameter is a dummy to keep templates distinct.) |
338 | template <typename T, typename = std::enable_if_t<std::is_same_v<T, bool>>, |
339 | bool = false> |
340 | Value(T B) : Type(T_Boolean) { |
341 | create<bool>(B); |
342 | } |
343 | |
344 | // Unsigned 64-bit integers. |
345 | template <typename T, typename = std::enable_if_t<is_uint_64_bit_v<T>>> |
346 | Value(T V) : Type(T_UINT64) { |
347 | create<uint64_t>(V: uint64_t{V}); |
348 | } |
349 | |
350 | // Integers (except boolean and uint64_t). |
351 | // Must be non-narrowing convertible to int64_t. |
352 | template <typename T, typename = std::enable_if_t<std::is_integral_v<T>>, |
353 | typename = std::enable_if_t<!std::is_same_v<T, bool>>, |
354 | typename = std::enable_if_t<!is_uint_64_bit_v<T>>> |
355 | Value(T I) : Type(T_Integer) { |
356 | create<int64_t>(V: int64_t{I}); |
357 | } |
358 | // Floating point. Must be non-narrowing convertible to double. |
359 | template <typename T, |
360 | typename = std::enable_if_t<std::is_floating_point_v<T>>, |
361 | double * = nullptr> |
362 | Value(T D) : Type(T_Double) { |
363 | create<double>(V: double{D}); |
364 | } |
365 | // Serializable types: with a toJSON(const T&)->Value function, found by ADL. |
366 | template <typename T, |
367 | typename = std::enable_if_t< |
368 | std::is_same_v<Value, decltype(toJSON(*(const T *)nullptr))>>, |
369 | Value * = nullptr> |
370 | Value(const T &V) : Value(toJSON(V)) {} |
371 | |
372 | Value &operator=(const Value &M) { |
373 | destroy(); |
374 | copyFrom(M); |
375 | return *this; |
376 | } |
377 | Value &operator=(Value &&M) { |
378 | destroy(); |
379 | moveFrom(M: std::move(M)); |
380 | return *this; |
381 | } |
382 | ~Value() { destroy(); } |
383 | |
384 | Kind kind() const { |
385 | switch (Type) { |
386 | case T_Null: |
387 | return Null; |
388 | case T_Boolean: |
389 | return Boolean; |
390 | case T_Double: |
391 | case T_Integer: |
392 | case T_UINT64: |
393 | return Number; |
394 | case T_String: |
395 | case T_StringRef: |
396 | return String; |
397 | case T_Object: |
398 | return Object; |
399 | case T_Array: |
400 | return Array; |
401 | } |
402 | llvm_unreachable("Unknown kind" ); |
403 | } |
404 | |
405 | // Typed accessors return std::nullopt/nullptr if the Value is not of this |
406 | // type. |
407 | std::optional<std::nullptr_t> getAsNull() const { |
408 | if (LLVM_LIKELY(Type == T_Null)) |
409 | return nullptr; |
410 | return std::nullopt; |
411 | } |
412 | std::optional<bool> getAsBoolean() const { |
413 | if (LLVM_LIKELY(Type == T_Boolean)) |
414 | return as<bool>(); |
415 | return std::nullopt; |
416 | } |
417 | std::optional<double> getAsNumber() const { |
418 | if (LLVM_LIKELY(Type == T_Double)) |
419 | return as<double>(); |
420 | if (LLVM_LIKELY(Type == T_Integer)) |
421 | return as<int64_t>(); |
422 | if (LLVM_LIKELY(Type == T_UINT64)) |
423 | return as<uint64_t>(); |
424 | return std::nullopt; |
425 | } |
426 | // Succeeds if the Value is a Number, and exactly representable as int64_t. |
427 | std::optional<int64_t> getAsInteger() const { |
428 | if (LLVM_LIKELY(Type == T_Integer)) |
429 | return as<int64_t>(); |
430 | if (LLVM_LIKELY(Type == T_UINT64)) { |
431 | uint64_t U = as<uint64_t>(); |
432 | if (LLVM_LIKELY(U <= uint64_t(std::numeric_limits<int64_t>::max()))) { |
433 | return U; |
434 | } |
435 | } |
436 | if (LLVM_LIKELY(Type == T_Double)) { |
437 | double D = as<double>(); |
438 | if (LLVM_LIKELY(std::modf(D, &D) == 0.0 && |
439 | D >= double(std::numeric_limits<int64_t>::min()) && |
440 | D <= double(std::numeric_limits<int64_t>::max()))) |
441 | return D; |
442 | } |
443 | return std::nullopt; |
444 | } |
445 | std::optional<uint64_t> getAsUINT64() const { |
446 | if (Type == T_UINT64) |
447 | return as<uint64_t>(); |
448 | else if (Type == T_Integer) { |
449 | int64_t N = as<int64_t>(); |
450 | if (N >= 0) |
451 | return as<uint64_t>(); |
452 | } |
453 | return std::nullopt; |
454 | } |
455 | std::optional<llvm::StringRef> getAsString() const { |
456 | if (Type == T_String) |
457 | return llvm::StringRef(as<std::string>()); |
458 | if (LLVM_LIKELY(Type == T_StringRef)) |
459 | return as<llvm::StringRef>(); |
460 | return std::nullopt; |
461 | } |
462 | const json::Object *getAsObject() const { |
463 | return LLVM_LIKELY(Type == T_Object) ? &as<json::Object>() : nullptr; |
464 | } |
465 | json::Object *getAsObject() { |
466 | return LLVM_LIKELY(Type == T_Object) ? &as<json::Object>() : nullptr; |
467 | } |
468 | const json::Array *getAsArray() const { |
469 | return LLVM_LIKELY(Type == T_Array) ? &as<json::Array>() : nullptr; |
470 | } |
471 | json::Array *getAsArray() { |
472 | return LLVM_LIKELY(Type == T_Array) ? &as<json::Array>() : nullptr; |
473 | } |
474 | |
475 | private: |
476 | void destroy(); |
477 | void copyFrom(const Value &M); |
478 | // We allow moving from *const* Values, by marking all members as mutable! |
479 | // This hack is needed to support initializer-list syntax efficiently. |
480 | // (std::initializer_list<T> is a container of const T). |
481 | void moveFrom(const Value &&M); |
482 | friend class Array; |
483 | friend class Object; |
484 | |
485 | template <typename T, typename... U> void create(U &&... V) { |
486 | #if LLVM_ADDRESS_SANITIZER_BUILD |
487 | // Unpoisoning to prevent overwriting poisoned object (e.g., annotated short |
488 | // string). Objects that have had their memory poisoned may cause an ASan |
489 | // error if their memory is reused without calling their destructor. |
490 | // Unpoisoning the memory prevents this error from occurring. |
491 | // FIXME: This is a temporary solution to prevent buildbots from failing. |
492 | // The more appropriate approach would be to call the object's destructor |
493 | // to unpoison memory. This would prevent any potential memory leaks (long |
494 | // strings). Read for details: |
495 | // https://github.com/llvm/llvm-project/pull/79065#discussion_r1462621761 |
496 | __asan_unpoison_memory_region(&Union, sizeof(T)); |
497 | #endif |
498 | new (reinterpret_cast<T *>(&Union)) T(std::forward<U>(V)...); |
499 | } |
500 | template <typename T> T &as() const { |
501 | // Using this two-step static_cast via void * instead of reinterpret_cast |
502 | // silences a -Wstrict-aliasing false positive from GCC6 and earlier. |
503 | void *Storage = static_cast<void *>(&Union); |
504 | return *static_cast<T *>(Storage); |
505 | } |
506 | |
507 | friend class OStream; |
508 | |
509 | enum ValueType : char16_t { |
510 | T_Null, |
511 | T_Boolean, |
512 | T_Double, |
513 | T_Integer, |
514 | T_UINT64, |
515 | T_StringRef, |
516 | T_String, |
517 | T_Object, |
518 | T_Array, |
519 | }; |
520 | // All members mutable, see moveFrom(). |
521 | mutable ValueType Type; |
522 | mutable llvm::AlignedCharArrayUnion<bool, double, int64_t, uint64_t, |
523 | llvm::StringRef, std::string, json::Array, |
524 | json::Object> |
525 | Union; |
526 | friend bool operator==(const Value &, const Value &); |
527 | }; |
528 | |
529 | bool operator==(const Value &, const Value &); |
530 | inline bool operator!=(const Value &L, const Value &R) { return !(L == R); } |
531 | |
532 | // Array Methods |
533 | inline Value &Array::operator[](size_t I) { return V[I]; } |
534 | inline const Value &Array::operator[](size_t I) const { return V[I]; } |
535 | inline Value &Array::front() { return V.front(); } |
536 | inline const Value &Array::front() const { return V.front(); } |
537 | inline Value &Array::back() { return V.back(); } |
538 | inline const Value &Array::back() const { return V.back(); } |
539 | inline Value *Array::data() { return V.data(); } |
540 | inline const Value *Array::data() const { return V.data(); } |
541 | |
542 | inline typename Array::iterator Array::begin() { return V.begin(); } |
543 | inline typename Array::const_iterator Array::begin() const { return V.begin(); } |
544 | inline typename Array::iterator Array::end() { return V.end(); } |
545 | inline typename Array::const_iterator Array::end() const { return V.end(); } |
546 | |
547 | inline bool Array::empty() const { return V.empty(); } |
548 | inline size_t Array::size() const { return V.size(); } |
549 | inline void Array::reserve(size_t S) { V.reserve(n: S); } |
550 | |
551 | inline void Array::clear() { V.clear(); } |
552 | inline void Array::push_back(const Value &E) { V.push_back(x: E); } |
553 | inline void Array::push_back(Value &&E) { V.push_back(x: std::move(E)); } |
554 | template <typename... Args> inline void Array::emplace_back(Args &&...A) { |
555 | V.emplace_back(std::forward<Args>(A)...); |
556 | } |
557 | inline void Array::pop_back() { V.pop_back(); } |
558 | inline typename Array::iterator Array::insert(const_iterator P, const Value &E) { |
559 | return V.insert(position: P, x: E); |
560 | } |
561 | inline typename Array::iterator Array::insert(const_iterator P, Value &&E) { |
562 | return V.insert(position: P, x: std::move(E)); |
563 | } |
564 | template <typename It> |
565 | inline typename Array::iterator Array::insert(const_iterator P, It A, It Z) { |
566 | return V.insert(P, A, Z); |
567 | } |
568 | template <typename... Args> |
569 | inline typename Array::iterator Array::emplace(const_iterator P, Args &&...A) { |
570 | return V.emplace(P, std::forward<Args>(A)...); |
571 | } |
572 | inline bool operator==(const Array &L, const Array &R) { return L.V == R.V; } |
573 | |
574 | /// ObjectKey is a used to capture keys in Object. Like Value but: |
575 | /// - only strings are allowed |
576 | /// - it's optimized for the string literal case (Owned == nullptr) |
577 | /// Like Value, strings must be UTF-8. See isUTF8 documentation for details. |
578 | class ObjectKey { |
579 | public: |
580 | ObjectKey(const char *S) : ObjectKey(StringRef(S)) {} |
581 | ObjectKey(std::string S) : Owned(new std::string(std::move(S))) { |
582 | if (LLVM_UNLIKELY(!isUTF8(*Owned))) { |
583 | assert(false && "Invalid UTF-8 in value used as JSON" ); |
584 | *Owned = fixUTF8(S: std::move(*Owned)); |
585 | } |
586 | Data = *Owned; |
587 | } |
588 | ObjectKey(llvm::StringRef S) : Data(S) { |
589 | if (LLVM_UNLIKELY(!isUTF8(Data))) { |
590 | assert(false && "Invalid UTF-8 in value used as JSON" ); |
591 | *this = ObjectKey(fixUTF8(S)); |
592 | } |
593 | } |
594 | ObjectKey(const llvm::SmallVectorImpl<char> &V) |
595 | : ObjectKey(std::string(V.begin(), V.end())) {} |
596 | ObjectKey(const llvm::formatv_object_base &V) : ObjectKey(V.str()) {} |
597 | |
598 | ObjectKey(const ObjectKey &C) { *this = C; } |
599 | ObjectKey(ObjectKey &&C) : ObjectKey(static_cast<const ObjectKey &&>(C)) {} |
600 | ObjectKey &operator=(const ObjectKey &C) { |
601 | if (C.Owned) { |
602 | Owned.reset(p: new std::string(*C.Owned)); |
603 | Data = *Owned; |
604 | } else { |
605 | Data = C.Data; |
606 | } |
607 | return *this; |
608 | } |
609 | ObjectKey &operator=(ObjectKey &&) = default; |
610 | |
611 | operator llvm::StringRef() const { return Data; } |
612 | std::string str() const { return Data.str(); } |
613 | |
614 | private: |
615 | // FIXME: this is unneccesarily large (3 pointers). Pointer + length + owned |
616 | // could be 2 pointers at most. |
617 | std::unique_ptr<std::string> Owned; |
618 | llvm::StringRef Data; |
619 | }; |
620 | |
621 | inline bool operator==(const ObjectKey &L, const ObjectKey &R) { |
622 | return llvm::StringRef(L) == llvm::StringRef(R); |
623 | } |
624 | inline bool operator!=(const ObjectKey &L, const ObjectKey &R) { |
625 | return !(L == R); |
626 | } |
627 | inline bool operator<(const ObjectKey &L, const ObjectKey &R) { |
628 | return StringRef(L) < StringRef(R); |
629 | } |
630 | |
631 | struct Object::KV { |
632 | ObjectKey K; |
633 | Value V; |
634 | }; |
635 | |
636 | inline Object::Object(std::initializer_list<KV> Properties) { |
637 | for (const auto &P : Properties) { |
638 | auto R = try_emplace(K: P.K, Args: nullptr); |
639 | if (R.second) |
640 | R.first->getSecond().moveFrom(M: std::move(P.V)); |
641 | } |
642 | } |
643 | inline std::pair<Object::iterator, bool> Object::insert(KV E) { |
644 | return try_emplace(K: std::move(E.K), Args: std::move(E.V)); |
645 | } |
646 | inline bool Object::erase(StringRef K) { |
647 | return M.erase(Val: ObjectKey(K)); |
648 | } |
649 | |
650 | /// A "cursor" marking a position within a Value. |
651 | /// The Value is a tree, and this is the path from the root to the current node. |
652 | /// This is used to associate errors with particular subobjects. |
653 | class Path { |
654 | public: |
655 | class Root; |
656 | |
657 | /// Records that the value at the current path is invalid. |
658 | /// Message is e.g. "expected number" and becomes part of the final error. |
659 | /// This overwrites any previously written error message in the root. |
660 | void report(llvm::StringLiteral Message); |
661 | |
662 | /// The root may be treated as a Path. |
663 | Path(Root &R) : Parent(nullptr), Seg(&R) {} |
664 | /// Derives a path for an array element: this[Index] |
665 | Path index(unsigned Index) const { return Path(this, Segment(Index)); } |
666 | /// Derives a path for an object field: this.Field |
667 | Path field(StringRef Field) const { return Path(this, Segment(Field)); } |
668 | |
669 | private: |
670 | /// One element in a JSON path: an object field (.foo) or array index [27]. |
671 | /// Exception: the root Path encodes a pointer to the Path::Root. |
672 | class Segment { |
673 | uintptr_t Pointer; |
674 | unsigned Offset; |
675 | |
676 | public: |
677 | Segment() = default; |
678 | Segment(Root *R) : Pointer(reinterpret_cast<uintptr_t>(R)) {} |
679 | Segment(llvm::StringRef Field) |
680 | : Pointer(reinterpret_cast<uintptr_t>(Field.data())), |
681 | Offset(static_cast<unsigned>(Field.size())) {} |
682 | Segment(unsigned Index) : Pointer(0), Offset(Index) {} |
683 | |
684 | bool isField() const { return Pointer != 0; } |
685 | StringRef field() const { |
686 | return StringRef(reinterpret_cast<const char *>(Pointer), Offset); |
687 | } |
688 | unsigned index() const { return Offset; } |
689 | Root *root() const { return reinterpret_cast<Root *>(Pointer); } |
690 | }; |
691 | |
692 | const Path *Parent; |
693 | Segment Seg; |
694 | |
695 | Path(const Path *Parent, Segment S) : Parent(Parent), Seg(S) {} |
696 | }; |
697 | |
698 | /// The root is the trivial Path to the root value. |
699 | /// It also stores the latest reported error and the path where it occurred. |
700 | class Path::Root { |
701 | llvm::StringRef Name; |
702 | llvm::StringLiteral ErrorMessage; |
703 | std::vector<Path::Segment> ErrorPath; // Only valid in error state. Reversed. |
704 | |
705 | friend void Path::report(llvm::StringLiteral Message); |
706 | |
707 | public: |
708 | Root(llvm::StringRef Name = "" ) : Name(Name), ErrorMessage("" ) {} |
709 | // No copy/move allowed as there are incoming pointers. |
710 | Root(Root &&) = delete; |
711 | Root &operator=(Root &&) = delete; |
712 | Root(const Root &) = delete; |
713 | Root &operator=(const Root &) = delete; |
714 | |
715 | /// Returns the last error reported, or else a generic error. |
716 | Error getError() const; |
717 | /// Print the root value with the error shown inline as a comment. |
718 | /// Unrelated parts of the value are elided for brevity, e.g. |
719 | /// { |
720 | /// "id": 42, |
721 | /// "name": /* expected string */ null, |
722 | /// "properties": { ... } |
723 | /// } |
724 | void printErrorContext(const Value &, llvm::raw_ostream &) const; |
725 | }; |
726 | |
727 | // Standard deserializers are provided for primitive types. |
728 | // See comments on Value. |
729 | inline bool fromJSON(const Value &E, std::string &Out, Path P) { |
730 | if (auto S = E.getAsString()) { |
731 | Out = std::string(*S); |
732 | return true; |
733 | } |
734 | P.report(Message: "expected string" ); |
735 | return false; |
736 | } |
737 | inline bool fromJSON(const Value &E, int &Out, Path P) { |
738 | if (auto S = E.getAsInteger()) { |
739 | Out = *S; |
740 | return true; |
741 | } |
742 | P.report(Message: "expected integer" ); |
743 | return false; |
744 | } |
745 | inline bool fromJSON(const Value &E, int64_t &Out, Path P) { |
746 | if (auto S = E.getAsInteger()) { |
747 | Out = *S; |
748 | return true; |
749 | } |
750 | P.report(Message: "expected integer" ); |
751 | return false; |
752 | } |
753 | inline bool fromJSON(const Value &E, double &Out, Path P) { |
754 | if (auto S = E.getAsNumber()) { |
755 | Out = *S; |
756 | return true; |
757 | } |
758 | P.report(Message: "expected number" ); |
759 | return false; |
760 | } |
761 | inline bool fromJSON(const Value &E, bool &Out, Path P) { |
762 | if (auto S = E.getAsBoolean()) { |
763 | Out = *S; |
764 | return true; |
765 | } |
766 | P.report(Message: "expected boolean" ); |
767 | return false; |
768 | } |
769 | inline bool fromJSON(const Value &E, uint64_t &Out, Path P) { |
770 | if (auto S = E.getAsUINT64()) { |
771 | Out = *S; |
772 | return true; |
773 | } |
774 | P.report(Message: "expected uint64_t" ); |
775 | return false; |
776 | } |
777 | inline bool fromJSON(const Value &E, std::nullptr_t &Out, Path P) { |
778 | if (auto S = E.getAsNull()) { |
779 | Out = *S; |
780 | return true; |
781 | } |
782 | P.report(Message: "expected null" ); |
783 | return false; |
784 | } |
785 | template <typename T> |
786 | bool fromJSON(const Value &E, std::optional<T> &Out, Path P) { |
787 | if (E.getAsNull()) { |
788 | Out = std::nullopt; |
789 | return true; |
790 | } |
791 | T Result = {}; |
792 | if (!fromJSON(E, Result, P)) |
793 | return false; |
794 | Out = std::move(Result); |
795 | return true; |
796 | } |
797 | template <typename T> |
798 | bool fromJSON(const Value &E, std::vector<T> &Out, Path P) { |
799 | if (auto *A = E.getAsArray()) { |
800 | Out.clear(); |
801 | Out.resize(A->size()); |
802 | for (size_t I = 0; I < A->size(); ++I) |
803 | if (!fromJSON((*A)[I], Out[I], P.index(Index: I))) |
804 | return false; |
805 | return true; |
806 | } |
807 | P.report(Message: "expected array" ); |
808 | return false; |
809 | } |
810 | template <typename T> |
811 | bool fromJSON(const Value &E, std::map<std::string, T> &Out, Path P) { |
812 | if (auto *O = E.getAsObject()) { |
813 | Out.clear(); |
814 | for (const auto &KV : *O) |
815 | if (!fromJSON(KV.second, Out[std::string(llvm::StringRef(KV.first))], |
816 | P.field(Field: KV.first))) |
817 | return false; |
818 | return true; |
819 | } |
820 | P.report(Message: "expected object" ); |
821 | return false; |
822 | } |
823 | |
824 | // Allow serialization of std::optional<T> for supported T. |
825 | template <typename T> Value toJSON(const std::optional<T> &Opt) { |
826 | return Opt ? Value(*Opt) : Value(nullptr); |
827 | } |
828 | |
829 | /// Helper for mapping JSON objects onto protocol structs. |
830 | /// |
831 | /// Example: |
832 | /// \code |
833 | /// bool fromJSON(const Value &E, MyStruct &R, Path P) { |
834 | /// ObjectMapper O(E, P); |
835 | /// // When returning false, error details were already reported. |
836 | /// return O && O.map("mandatory_field", R.MandatoryField) && |
837 | /// O.mapOptional("optional_field", R.OptionalField); |
838 | /// } |
839 | /// \endcode |
840 | class ObjectMapper { |
841 | public: |
842 | /// If O is not an object, this mapper is invalid and an error is reported. |
843 | ObjectMapper(const Value &E, Path P) : O(E.getAsObject()), P(P) { |
844 | if (!O) |
845 | P.report(Message: "expected object" ); |
846 | } |
847 | |
848 | /// True if the expression is an object. |
849 | /// Must be checked before calling map(). |
850 | operator bool() const { return O; } |
851 | |
852 | /// Maps a property to a field. |
853 | /// If the property is missing or invalid, reports an error. |
854 | template <typename T> bool map(StringLiteral Prop, T &Out) { |
855 | assert(*this && "Must check this is an object before calling map()" ); |
856 | if (const Value *E = O->get(K: Prop)) |
857 | return fromJSON(*E, Out, P.field(Field: Prop)); |
858 | P.field(Field: Prop).report(Message: "missing value" ); |
859 | return false; |
860 | } |
861 | |
862 | /// Maps a property to a field, if it exists. |
863 | /// If the property exists and is invalid, reports an error. |
864 | /// (Optional requires special handling, because missing keys are OK). |
865 | template <typename T> bool map(StringLiteral Prop, std::optional<T> &Out) { |
866 | assert(*this && "Must check this is an object before calling map()" ); |
867 | if (const Value *E = O->get(K: Prop)) |
868 | return fromJSON(*E, Out, P.field(Field: Prop)); |
869 | Out = std::nullopt; |
870 | return true; |
871 | } |
872 | |
873 | /// Maps a property to a field, if it exists. |
874 | /// If the property exists and is invalid, reports an error. |
875 | /// If the property does not exist, Out is unchanged. |
876 | template <typename T> bool mapOptional(StringLiteral Prop, T &Out) { |
877 | assert(*this && "Must check this is an object before calling map()" ); |
878 | if (const Value *E = O->get(K: Prop)) |
879 | return fromJSON(*E, Out, P.field(Field: Prop)); |
880 | return true; |
881 | } |
882 | |
883 | private: |
884 | const Object *O; |
885 | Path P; |
886 | }; |
887 | |
888 | /// Parses the provided JSON source, or returns a ParseError. |
889 | /// The returned Value is self-contained and owns its strings (they do not refer |
890 | /// to the original source). |
891 | llvm::Expected<Value> parse(llvm::StringRef JSON); |
892 | |
893 | class ParseError : public llvm::ErrorInfo<ParseError> { |
894 | const char *Msg; |
895 | unsigned Line, Column, Offset; |
896 | |
897 | public: |
898 | static char ID; |
899 | ParseError(const char *Msg, unsigned Line, unsigned Column, unsigned Offset) |
900 | : Msg(Msg), Line(Line), Column(Column), Offset(Offset) {} |
901 | void log(llvm::raw_ostream &OS) const override { |
902 | OS << llvm::formatv(Fmt: "[{0}:{1}, byte={2}]: {3}" , Vals: Line, Vals: Column, Vals: Offset, Vals: Msg); |
903 | } |
904 | std::error_code convertToErrorCode() const override { |
905 | return llvm::inconvertibleErrorCode(); |
906 | } |
907 | }; |
908 | |
909 | /// Version of parse() that converts the parsed value to the type T. |
910 | /// RootName describes the root object and is used in error messages. |
911 | template <typename T> |
912 | Expected<T> parse(const llvm::StringRef &JSON, const char *RootName = "" ) { |
913 | auto V = parse(JSON); |
914 | if (!V) |
915 | return V.takeError(); |
916 | Path::Root R(RootName); |
917 | T Result; |
918 | if (fromJSON(*V, Result, R)) |
919 | return std::move(Result); |
920 | return R.getError(); |
921 | } |
922 | |
923 | /// json::OStream allows writing well-formed JSON without materializing |
924 | /// all structures as json::Value ahead of time. |
925 | /// It's faster, lower-level, and less safe than OS << json::Value. |
926 | /// It also allows emitting more constructs, such as comments. |
927 | /// |
928 | /// Only one "top-level" object can be written to a stream. |
929 | /// Simplest usage involves passing lambdas (Blocks) to fill in containers: |
930 | /// |
931 | /// json::OStream J(OS); |
932 | /// J.array([&]{ |
933 | /// for (const Event &E : Events) |
934 | /// J.object([&] { |
935 | /// J.attribute("timestamp", int64_t(E.Time)); |
936 | /// J.attributeArray("participants", [&] { |
937 | /// for (const Participant &P : E.Participants) |
938 | /// J.value(P.toString()); |
939 | /// }); |
940 | /// }); |
941 | /// }); |
942 | /// |
943 | /// This would produce JSON like: |
944 | /// |
945 | /// [ |
946 | /// { |
947 | /// "timestamp": 19287398741, |
948 | /// "participants": [ |
949 | /// "King Kong", |
950 | /// "Miley Cyrus", |
951 | /// "Cleopatra" |
952 | /// ] |
953 | /// }, |
954 | /// ... |
955 | /// ] |
956 | /// |
957 | /// The lower level begin/end methods (arrayBegin()) are more flexible but |
958 | /// care must be taken to pair them correctly: |
959 | /// |
960 | /// json::OStream J(OS); |
961 | // J.arrayBegin(); |
962 | /// for (const Event &E : Events) { |
963 | /// J.objectBegin(); |
964 | /// J.attribute("timestamp", int64_t(E.Time)); |
965 | /// J.attributeBegin("participants"); |
966 | /// for (const Participant &P : E.Participants) |
967 | /// J.value(P.toString()); |
968 | /// J.attributeEnd(); |
969 | /// J.objectEnd(); |
970 | /// } |
971 | /// J.arrayEnd(); |
972 | /// |
973 | /// If the call sequence isn't valid JSON, asserts will fire in debug mode. |
974 | /// This can be mismatched begin()/end() pairs, trying to emit attributes inside |
975 | /// an array, and so on. |
976 | /// With asserts disabled, this is undefined behavior. |
977 | class OStream { |
978 | public: |
979 | using Block = llvm::function_ref<void()>; |
980 | // If IndentSize is nonzero, output is pretty-printed. |
981 | explicit OStream(llvm::raw_ostream &OS, unsigned IndentSize = 0) |
982 | : OS(OS), IndentSize(IndentSize) { |
983 | Stack.emplace_back(); |
984 | } |
985 | ~OStream() { |
986 | assert(Stack.size() == 1 && "Unmatched begin()/end()" ); |
987 | assert(Stack.back().Ctx == Singleton); |
988 | assert(Stack.back().HasValue && "Did not write top-level value" ); |
989 | } |
990 | |
991 | /// Flushes the underlying ostream. OStream does not buffer internally. |
992 | void flush() { OS.flush(); } |
993 | |
994 | // High level functions to output a value. |
995 | // Valid at top-level (exactly once), in an attribute value (exactly once), |
996 | // or in an array (any number of times). |
997 | |
998 | /// Emit a self-contained value (number, string, vector<string> etc). |
999 | void value(const Value &V); |
1000 | /// Emit an array whose elements are emitted in the provided Block. |
1001 | void array(Block Contents) { |
1002 | arrayBegin(); |
1003 | Contents(); |
1004 | arrayEnd(); |
1005 | } |
1006 | /// Emit an object whose elements are emitted in the provided Block. |
1007 | void object(Block Contents) { |
1008 | objectBegin(); |
1009 | Contents(); |
1010 | objectEnd(); |
1011 | } |
1012 | /// Emit an externally-serialized value. |
1013 | /// The caller must write exactly one valid JSON value to the provided stream. |
1014 | /// No validation or formatting of this value occurs. |
1015 | void rawValue(llvm::function_ref<void(raw_ostream &)> Contents) { |
1016 | rawValueBegin(); |
1017 | Contents(OS); |
1018 | rawValueEnd(); |
1019 | } |
1020 | void rawValue(llvm::StringRef Contents) { |
1021 | rawValue(Contents: [&](raw_ostream &OS) { OS << Contents; }); |
1022 | } |
1023 | /// Emit a JavaScript comment associated with the next printed value. |
1024 | /// The string must be valid until the next attribute or value is emitted. |
1025 | /// Comments are not part of standard JSON, and many parsers reject them! |
1026 | void (llvm::StringRef); |
1027 | |
1028 | // High level functions to output object attributes. |
1029 | // Valid only within an object (any number of times). |
1030 | |
1031 | /// Emit an attribute whose value is self-contained (number, vector<int> etc). |
1032 | void attribute(llvm::StringRef Key, const Value& Contents) { |
1033 | attributeImpl(Key, Contents: [&] { value(V: Contents); }); |
1034 | } |
1035 | /// Emit an attribute whose value is an array with elements from the Block. |
1036 | void attributeArray(llvm::StringRef Key, Block Contents) { |
1037 | attributeImpl(Key, Contents: [&] { array(Contents); }); |
1038 | } |
1039 | /// Emit an attribute whose value is an object with attributes from the Block. |
1040 | void attributeObject(llvm::StringRef Key, Block Contents) { |
1041 | attributeImpl(Key, Contents: [&] { object(Contents); }); |
1042 | } |
1043 | |
1044 | // Low-level begin/end functions to output arrays, objects, and attributes. |
1045 | // Must be correctly paired. Allowed contexts are as above. |
1046 | |
1047 | void arrayBegin(); |
1048 | void arrayEnd(); |
1049 | void objectBegin(); |
1050 | void objectEnd(); |
1051 | void attributeBegin(llvm::StringRef Key); |
1052 | void attributeEnd(); |
1053 | raw_ostream &rawValueBegin(); |
1054 | void rawValueEnd(); |
1055 | |
1056 | private: |
1057 | void attributeImpl(llvm::StringRef Key, Block Contents) { |
1058 | attributeBegin(Key); |
1059 | Contents(); |
1060 | attributeEnd(); |
1061 | } |
1062 | |
1063 | void valueBegin(); |
1064 | void (); |
1065 | void newline(); |
1066 | |
1067 | enum Context { |
1068 | Singleton, // Top level, or object attribute. |
1069 | Array, |
1070 | Object, |
1071 | RawValue, // External code writing a value to OS directly. |
1072 | }; |
1073 | struct State { |
1074 | Context Ctx = Singleton; |
1075 | bool HasValue = false; |
1076 | }; |
1077 | llvm::SmallVector<State, 16> Stack; // Never empty. |
1078 | llvm::StringRef ; |
1079 | llvm::raw_ostream &OS; |
1080 | unsigned IndentSize; |
1081 | unsigned Indent = 0; |
1082 | }; |
1083 | |
1084 | /// Serializes this Value to JSON, writing it to the provided stream. |
1085 | /// The formatting is compact (no extra whitespace) and deterministic. |
1086 | /// For pretty-printing, use the formatv() format_provider below. |
1087 | inline llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const Value &V) { |
1088 | OStream(OS).value(V); |
1089 | return OS; |
1090 | } |
1091 | } // namespace json |
1092 | |
1093 | /// Allow printing json::Value with formatv(). |
1094 | /// The default style is basic/compact formatting, like operator<<. |
1095 | /// A format string like formatv("{0:2}", Value) pretty-prints with indent 2. |
1096 | template <> struct format_provider<llvm::json::Value> { |
1097 | static void format(const llvm::json::Value &, raw_ostream &, StringRef); |
1098 | }; |
1099 | } // namespace llvm |
1100 | |
1101 | #endif |
1102 | |