1// Copyright (C) 2020 The Qt Company Ltd.
2// Copyright (C) 2021 Intel Corporation.
3// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only
4
5#ifndef QT_BOOTSTRAPPED
6#include <qcoreapplication.h>
7#endif
8#include <qdebug.h>
9#include "qjsonparser_p.h"
10#include "qjson_p.h"
11#include "private/qstringconverter_p.h"
12#include "private/qcborvalue_p.h"
13#include "private/qnumeric_p.h"
14#include <private/qtools_p.h>
15
16//#define PARSER_DEBUG
17#ifdef PARSER_DEBUG
18Q_CONSTINIT static int indent = 0;
19#define BEGIN qDebug() << QByteArray(4*indent++, ' ').constData() << "pos=" << current
20#define END --indent
21#define DEBUG qDebug() << QByteArray(4*indent, ' ').constData()
22#else
23#define BEGIN if (1) ; else qDebug()
24#define END do {} while (0)
25#define DEBUG if (1) ; else qDebug()
26#endif
27
28static const int nestingLimit = 1024;
29
30QT_BEGIN_NAMESPACE
31
32using namespace QtMiscUtils;
33
34// error strings for the JSON parser
35#define JSONERR_OK QT_TRANSLATE_NOOP("QJsonParseError", "no error occurred")
36#define JSONERR_UNTERM_OBJ QT_TRANSLATE_NOOP("QJsonParseError", "unterminated object")
37#define JSONERR_MISS_NSEP QT_TRANSLATE_NOOP("QJsonParseError", "missing name separator")
38#define JSONERR_UNTERM_AR QT_TRANSLATE_NOOP("QJsonParseError", "unterminated array")
39#define JSONERR_MISS_VSEP QT_TRANSLATE_NOOP("QJsonParseError", "missing value separator")
40#define JSONERR_ILLEGAL_VAL QT_TRANSLATE_NOOP("QJsonParseError", "illegal value")
41#define JSONERR_END_OF_NUM QT_TRANSLATE_NOOP("QJsonParseError", "invalid termination by number")
42#define JSONERR_ILLEGAL_NUM QT_TRANSLATE_NOOP("QJsonParseError", "illegal number")
43#define JSONERR_STR_ESC_SEQ QT_TRANSLATE_NOOP("QJsonParseError", "invalid escape sequence")
44#define JSONERR_STR_UTF8 QT_TRANSLATE_NOOP("QJsonParseError", "invalid UTF8 string")
45#define JSONERR_UTERM_STR QT_TRANSLATE_NOOP("QJsonParseError", "unterminated string")
46#define JSONERR_MISS_OBJ QT_TRANSLATE_NOOP("QJsonParseError", "object is missing after a comma")
47#define JSONERR_DEEP_NEST QT_TRANSLATE_NOOP("QJsonParseError", "too deeply nested document")
48#define JSONERR_DOC_LARGE QT_TRANSLATE_NOOP("QJsonParseError", "too large document")
49#define JSONERR_GARBAGEEND QT_TRANSLATE_NOOP("QJsonParseError", "garbage at the end of the document")
50
51/*!
52 \class QJsonParseError
53 \inmodule QtCore
54 \ingroup json
55 \ingroup shared
56 \ingroup qtserialization
57 \reentrant
58 \since 5.0
59
60 \brief The QJsonParseError class is used to report errors during JSON parsing.
61
62 \sa {JSON Support in Qt}, {JSON Save Game Example}
63*/
64
65/*!
66 \enum QJsonParseError::ParseError
67
68 This enum describes the type of error that occurred during the parsing of a JSON document.
69
70 \value NoError No error occurred
71 \value UnterminatedObject An object is not correctly terminated with a closing curly bracket
72 \value MissingNameSeparator A comma separating different items is missing
73 \value UnterminatedArray The array is not correctly terminated with a closing square bracket
74 \value MissingValueSeparator A colon separating keys from values inside objects is missing
75 \value IllegalValue The value is illegal
76 \value TerminationByNumber The input stream ended while parsing a number
77 \value IllegalNumber The number is not well formed
78 \value IllegalEscapeSequence An illegal escape sequence occurred in the input
79 \value IllegalUTF8String An illegal UTF8 sequence occurred in the input
80 \value UnterminatedString A string wasn't terminated with a quote
81 \value MissingObject An object was expected but couldn't be found
82 \value DeepNesting The JSON document is too deeply nested for the parser to parse it
83 \value DocumentTooLarge The JSON document is too large for the parser to parse it
84 \value GarbageAtEnd The parsed document contains additional garbage characters at the end
85
86*/
87
88/*!
89 \variable QJsonParseError::error
90
91 Contains the type of the parse error. Is equal to QJsonParseError::NoError if the document
92 was parsed correctly.
93
94 \sa ParseError, errorString()
95*/
96
97
98/*!
99 \variable QJsonParseError::offset
100
101 Contains the offset in the input string where the parse error occurred.
102
103 \sa error, errorString()
104*/
105
106/*!
107 Returns the human-readable message appropriate to the reported JSON parsing error.
108
109 \sa error
110 */
111QString QJsonParseError::errorString() const
112{
113 const char *sz = "";
114 switch (error) {
115 case NoError:
116 sz = JSONERR_OK;
117 break;
118 case UnterminatedObject:
119 sz = JSONERR_UNTERM_OBJ;
120 break;
121 case MissingNameSeparator:
122 sz = JSONERR_MISS_NSEP;
123 break;
124 case UnterminatedArray:
125 sz = JSONERR_UNTERM_AR;
126 break;
127 case MissingValueSeparator:
128 sz = JSONERR_MISS_VSEP;
129 break;
130 case IllegalValue:
131 sz = JSONERR_ILLEGAL_VAL;
132 break;
133 case TerminationByNumber:
134 sz = JSONERR_END_OF_NUM;
135 break;
136 case IllegalNumber:
137 sz = JSONERR_ILLEGAL_NUM;
138 break;
139 case IllegalEscapeSequence:
140 sz = JSONERR_STR_ESC_SEQ;
141 break;
142 case IllegalUTF8String:
143 sz = JSONERR_STR_UTF8;
144 break;
145 case UnterminatedString:
146 sz = JSONERR_UTERM_STR;
147 break;
148 case MissingObject:
149 sz = JSONERR_MISS_OBJ;
150 break;
151 case DeepNesting:
152 sz = JSONERR_DEEP_NEST;
153 break;
154 case DocumentTooLarge:
155 sz = JSONERR_DOC_LARGE;
156 break;
157 case GarbageAtEnd:
158 sz = JSONERR_GARBAGEEND;
159 break;
160 }
161#ifndef QT_BOOTSTRAPPED
162 return QCoreApplication::translate(context: "QJsonParseError", key: sz);
163#else
164 return QLatin1StringView(sz);
165#endif
166}
167
168using namespace QJsonPrivate;
169
170class StashedContainer
171{
172 Q_DISABLE_COPY_MOVE(StashedContainer)
173public:
174 StashedContainer(QExplicitlySharedDataPointer<QCborContainerPrivate> *container,
175 QCborValue::Type type)
176 : type(type), stashed(std::move(*container)), current(container)
177 {
178 }
179
180 ~StashedContainer()
181 {
182 stashed->append(v: QCborContainerPrivate::makeValue(type, n: -1, d: current->take(),
183 disp: QCborContainerPrivate::MoveContainer));
184 *current = std::move(stashed);
185 }
186
187private:
188 QCborValue::Type type;
189 QExplicitlySharedDataPointer<QCborContainerPrivate> stashed;
190 QExplicitlySharedDataPointer<QCborContainerPrivate> *current;
191};
192
193Parser::Parser(const char *json, int length)
194 : head(json), json(json)
195 , nestingLevel(0)
196 , lastError(QJsonParseError::NoError)
197{
198 end = json + length;
199}
200
201
202
203/*
204
205begin-array = ws %x5B ws ; [ left square bracket
206
207begin-object = ws %x7B ws ; { left curly bracket
208
209end-array = ws %x5D ws ; ] right square bracket
210
211end-object = ws %x7D ws ; } right curly bracket
212
213name-separator = ws %x3A ws ; : colon
214
215value-separator = ws %x2C ws ; , comma
216
217Insignificant whitespace is allowed before or after any of the six
218structural characters.
219
220ws = *(
221 %x20 / ; Space
222 %x09 / ; Horizontal tab
223 %x0A / ; Line feed or New line
224 %x0D ; Carriage return
225 )
226
227*/
228
229enum {
230 Space = 0x20,
231 Tab = 0x09,
232 LineFeed = 0x0a,
233 Return = 0x0d,
234 BeginArray = 0x5b,
235 BeginObject = 0x7b,
236 EndArray = 0x5d,
237 EndObject = 0x7d,
238 NameSeparator = 0x3a,
239 ValueSeparator = 0x2c,
240 Quote = 0x22
241};
242
243void Parser::eatBOM()
244{
245 // eat UTF-8 byte order mark
246 uchar utf8bom[3] = { 0xef, 0xbb, 0xbf };
247 if (end - json > 3 &&
248 (uchar)json[0] == utf8bom[0] &&
249 (uchar)json[1] == utf8bom[1] &&
250 (uchar)json[2] == utf8bom[2])
251 json += 3;
252}
253
254bool Parser::eatSpace()
255{
256 while (json < end) {
257 if (*json > Space)
258 break;
259 if (*json != Space &&
260 *json != Tab &&
261 *json != LineFeed &&
262 *json != Return)
263 break;
264 ++json;
265 }
266 return (json < end);
267}
268
269char Parser::nextToken()
270{
271 if (!eatSpace())
272 return 0;
273 char token = *json++;
274 switch (token) {
275 case BeginArray:
276 case BeginObject:
277 case NameSeparator:
278 case ValueSeparator:
279 case EndArray:
280 case EndObject:
281 case Quote:
282 break;
283 default:
284 token = 0;
285 break;
286 }
287 return token;
288}
289
290/*
291 JSON-text = object / array
292*/
293QCborValue Parser::parse(QJsonParseError *error)
294{
295#ifdef PARSER_DEBUG
296 indent = 0;
297 qDebug(">>>>> parser begin");
298#endif
299 eatBOM();
300 char token = nextToken();
301
302 QCborValue data;
303
304 DEBUG << Qt::hex << (uint)token;
305 if (token == BeginArray) {
306 container = new QCborContainerPrivate;
307 if (!parseArray())
308 goto error;
309 data = QCborContainerPrivate::makeValue(type: QCborValue::Array, n: -1, d: container.take(),
310 disp: QCborContainerPrivate::MoveContainer);
311 } else if (token == BeginObject) {
312 container = new QCborContainerPrivate;
313 if (!parseObject())
314 goto error;
315 data = QCborContainerPrivate::makeValue(type: QCborValue::Map, n: -1, d: container.take(),
316 disp: QCborContainerPrivate::MoveContainer);
317 } else {
318 lastError = QJsonParseError::IllegalValue;
319 goto error;
320 }
321
322 eatSpace();
323 if (json < end) {
324 lastError = QJsonParseError::GarbageAtEnd;
325 goto error;
326 }
327
328 END;
329 {
330 if (error) {
331 error->offset = 0;
332 error->error = QJsonParseError::NoError;
333 }
334
335 return data;
336 }
337
338error:
339#ifdef PARSER_DEBUG
340 qDebug(">>>>> parser error");
341#endif
342 container.reset();
343 if (error) {
344 error->offset = json - head;
345 error->error = lastError;
346 }
347 return QCborValue();
348}
349
350// We need to retain the _last_ value for any duplicate keys and we need to deref containers.
351// Therefore the manual implementation of std::unique().
352template<typename Iterator, typename Compare, typename Assign>
353static Iterator customAssigningUniqueLast(Iterator first, Iterator last,
354 Compare compare, Assign assign)
355{
356 first = std::adjacent_find(first, last, compare);
357 if (first == last)
358 return last;
359
360 // After adjacent_find, we know that *first and *(first+1) compare equal,
361 // and that first+1 != last.
362 Iterator result = first++;
363 Q_ASSERT(compare(*result, *first));
364 assign(*result, *first);
365 Q_ASSERT(first != last);
366
367 while (++first != last) {
368 if (!compare(*result, *first))
369 ++result;
370
371 // Due to adjacent_find above, we know that we've at least eliminated one element.
372 // Therefore we have to move each further element across the gap.
373 Q_ASSERT(result != first);
374
375 // We have to overwrite each element we want to eliminate, to deref() the container.
376 // Therefore we don't try to optimize the number of assignments here.
377 assign(*result, *first);
378 }
379
380 return ++result;
381}
382
383static void sortContainer(QCborContainerPrivate *container)
384{
385 using Forward = QJsonPrivate::KeyIterator;
386 using Value = Forward::value_type;
387
388 auto compare = [container](const Value &a, const Value &b)
389 {
390 const auto &aKey = a.key();
391 const auto &bKey = b.key();
392
393 Q_ASSERT(aKey.flags & QtCbor::Element::HasByteData);
394 Q_ASSERT(bKey.flags & QtCbor::Element::HasByteData);
395
396 const QtCbor::ByteData *aData = container->byteData(e: aKey);
397 const QtCbor::ByteData *bData = container->byteData(e: bKey);
398
399 if (!aData)
400 return bData ? -1 : 0;
401 if (!bData)
402 return 1;
403
404 // US-ASCII (StringIsAscii flag) is just a special case of UTF-8
405 // string, so we can safely ignore the flag.
406
407 if (aKey.flags & QtCbor::Element::StringIsUtf16) {
408 if (bKey.flags & QtCbor::Element::StringIsUtf16)
409 return QtPrivate::compareStrings(lhs: aData->asStringView(), rhs: bData->asStringView());
410
411 return -QCborContainerPrivate::compareUtf8(b: bData, s: aData->asStringView());
412 } else {
413 if (bKey.flags & QtCbor::Element::StringIsUtf16)
414 return QCborContainerPrivate::compareUtf8(b: aData, s: bData->asStringView());
415
416 return QtPrivate::compareStrings(lhs: aData->asUtf8StringView(), rhs: bData->asUtf8StringView());
417 }
418 };
419
420 // The elements' containers are owned by the outer container, not by the elements themselves.
421 auto move = [](Forward::reference target, Forward::reference source)
422 {
423 QtCbor::Element &targetValue = target.value();
424
425 // If the target has a container, deref it before overwriting, so that we don't leak.
426 if (targetValue.flags & QtCbor::Element::IsContainer)
427 targetValue.container->deref();
428
429 // Do not move, so that we can clear the value afterwards.
430 target = source;
431
432 // Clear the source value, so that we don't store the same container twice.
433 source.value() = QtCbor::Element();
434 };
435
436 std::stable_sort(
437 first: Forward(container->elements.begin()), last: Forward(container->elements.end()),
438 comp: [&compare](const Value &a, const Value &b) { return compare(a, b) < 0; });
439
440 Forward result = customAssigningUniqueLast(
441 first: Forward(container->elements.begin()), last: Forward(container->elements.end()),
442 compare: [&compare](const Value &a, const Value &b) { return compare(a, b) == 0; }, assign: move);
443
444 container->elements.erase(abegin: result.elementsIterator(), aend: container->elements.end());
445}
446
447
448/*
449 object = begin-object [ member *( value-separator member ) ]
450 end-object
451*/
452
453bool Parser::parseObject()
454{
455 if (++nestingLevel > nestingLimit) {
456 lastError = QJsonParseError::DeepNesting;
457 return false;
458 }
459
460 BEGIN << "parseObject" << json;
461
462 char token = nextToken();
463 while (token == Quote) {
464 if (!container)
465 container = new QCborContainerPrivate;
466 if (!parseMember())
467 return false;
468 token = nextToken();
469 if (token != ValueSeparator)
470 break;
471 token = nextToken();
472 if (token == EndObject) {
473 lastError = QJsonParseError::MissingObject;
474 return false;
475 }
476 }
477
478 DEBUG << "end token=" << token;
479 if (token != EndObject) {
480 lastError = QJsonParseError::UnterminatedObject;
481 return false;
482 }
483
484 END;
485
486 --nestingLevel;
487
488 if (container)
489 sortContainer(container: container.data());
490 return true;
491}
492
493/*
494 member = string name-separator value
495*/
496bool Parser::parseMember()
497{
498 BEGIN << "parseMember";
499
500 if (!parseString())
501 return false;
502 char token = nextToken();
503 if (token != NameSeparator) {
504 lastError = QJsonParseError::MissingNameSeparator;
505 return false;
506 }
507 if (!eatSpace()) {
508 lastError = QJsonParseError::UnterminatedObject;
509 return false;
510 }
511 if (!parseValue())
512 return false;
513
514 END;
515 return true;
516}
517
518/*
519 array = begin-array [ value *( value-separator value ) ] end-array
520*/
521bool Parser::parseArray()
522{
523 BEGIN << "parseArray";
524
525 if (++nestingLevel > nestingLimit) {
526 lastError = QJsonParseError::DeepNesting;
527 return false;
528 }
529
530 if (!eatSpace()) {
531 lastError = QJsonParseError::UnterminatedArray;
532 return false;
533 }
534 if (*json == EndArray) {
535 nextToken();
536 } else {
537 while (1) {
538 if (!eatSpace()) {
539 lastError = QJsonParseError::UnterminatedArray;
540 return false;
541 }
542 if (!container)
543 container = new QCborContainerPrivate;
544 if (!parseValue())
545 return false;
546 char token = nextToken();
547 if (token == EndArray)
548 break;
549 else if (token != ValueSeparator) {
550 if (!eatSpace())
551 lastError = QJsonParseError::UnterminatedArray;
552 else
553 lastError = QJsonParseError::MissingValueSeparator;
554 return false;
555 }
556 }
557 }
558
559 DEBUG << "size =" << (container ? container->elements.size() : 0);
560 END;
561
562 --nestingLevel;
563
564 return true;
565}
566
567/*
568value = false / null / true / object / array / number / string
569
570*/
571
572bool Parser::parseValue()
573{
574 BEGIN << "parse Value" << json;
575
576 switch (*json++) {
577 case 'n':
578 if (end - json < 4) {
579 lastError = QJsonParseError::IllegalValue;
580 return false;
581 }
582 if (*json++ == 'u' &&
583 *json++ == 'l' &&
584 *json++ == 'l') {
585 container->append(v: QCborValue(QCborValue::Null));
586 DEBUG << "value: null";
587 END;
588 return true;
589 }
590 lastError = QJsonParseError::IllegalValue;
591 return false;
592 case 't':
593 if (end - json < 4) {
594 lastError = QJsonParseError::IllegalValue;
595 return false;
596 }
597 if (*json++ == 'r' &&
598 *json++ == 'u' &&
599 *json++ == 'e') {
600 container->append(v: QCborValue(true));
601 DEBUG << "value: true";
602 END;
603 return true;
604 }
605 lastError = QJsonParseError::IllegalValue;
606 return false;
607 case 'f':
608 if (end - json < 5) {
609 lastError = QJsonParseError::IllegalValue;
610 return false;
611 }
612 if (*json++ == 'a' &&
613 *json++ == 'l' &&
614 *json++ == 's' &&
615 *json++ == 'e') {
616 container->append(v: QCborValue(false));
617 DEBUG << "value: false";
618 END;
619 return true;
620 }
621 lastError = QJsonParseError::IllegalValue;
622 return false;
623 case Quote: {
624 if (!parseString())
625 return false;
626 DEBUG << "value: string";
627 END;
628 return true;
629 }
630 case BeginArray: {
631 StashedContainer stashedContainer(&container, QCborValue::Array);
632 if (!parseArray())
633 return false;
634 DEBUG << "value: array";
635 END;
636 return true;
637 }
638 case BeginObject: {
639 StashedContainer stashedContainer(&container, QCborValue::Map);
640 if (!parseObject())
641 return false;
642 DEBUG << "value: object";
643 END;
644 return true;
645 }
646 case ValueSeparator:
647 // Essentially missing value, but after a colon, not after a comma
648 // like the other MissingObject errors.
649 lastError = QJsonParseError::IllegalValue;
650 return false;
651 case EndObject:
652 case EndArray:
653 lastError = QJsonParseError::MissingObject;
654 return false;
655 default:
656 --json;
657 if (!parseNumber())
658 return false;
659 DEBUG << "value: number";
660 END;
661 }
662
663 return true;
664}
665
666
667
668
669
670/*
671 number = [ minus ] int [ frac ] [ exp ]
672 decimal-point = %x2E ; .
673 digit1-9 = %x31-39 ; 1-9
674 e = %x65 / %x45 ; e E
675 exp = e [ minus / plus ] 1*DIGIT
676 frac = decimal-point 1*DIGIT
677 int = zero / ( digit1-9 *DIGIT )
678 minus = %x2D ; -
679 plus = %x2B ; +
680 zero = %x30 ; 0
681
682*/
683
684bool Parser::parseNumber()
685{
686 BEGIN << "parseNumber" << json;
687
688 const char *start = json;
689 bool isInt = true;
690
691 // minus
692 if (json < end && *json == '-')
693 ++json;
694
695 // int = zero / ( digit1-9 *DIGIT )
696 if (json < end && *json == '0') {
697 ++json;
698 } else {
699 while (json < end && isAsciiDigit(c: *json))
700 ++json;
701 }
702
703 // frac = decimal-point 1*DIGIT
704 if (json < end && *json == '.') {
705 ++json;
706 while (json < end && isAsciiDigit(c: *json)) {
707 isInt = isInt && *json == '0';
708 ++json;
709 }
710 }
711
712 // exp = e [ minus / plus ] 1*DIGIT
713 if (json < end && (*json == 'e' || *json == 'E')) {
714 isInt = false;
715 ++json;
716 if (json < end && (*json == '-' || *json == '+'))
717 ++json;
718 while (json < end && isAsciiDigit(c: *json))
719 ++json;
720 }
721
722 if (json >= end) {
723 lastError = QJsonParseError::TerminationByNumber;
724 return false;
725 }
726
727 const QByteArray number = QByteArray::fromRawData(data: start, size: json - start);
728 DEBUG << "numberstring" << number;
729
730 if (isInt) {
731 bool ok;
732 qlonglong n = number.toLongLong(ok: &ok);
733 if (ok) {
734 container->append(v: QCborValue(n));
735 END;
736 return true;
737 }
738 }
739
740 bool ok;
741 double d = number.toDouble(ok: &ok);
742
743 if (!ok) {
744 lastError = QJsonParseError::IllegalNumber;
745 return false;
746 }
747
748 qint64 n;
749 if (convertDoubleTo(v: d, value: &n))
750 container->append(v: QCborValue(n));
751 else
752 container->append(v: QCborValue(d));
753
754 END;
755 return true;
756}
757
758/*
759
760 string = quotation-mark *char quotation-mark
761
762 char = unescaped /
763 escape (
764 %x22 / ; " quotation mark U+0022
765 %x5C / ; \ reverse solidus U+005C
766 %x2F / ; / solidus U+002F
767 %x62 / ; b backspace U+0008
768 %x66 / ; f form feed U+000C
769 %x6E / ; n line feed U+000A
770 %x72 / ; r carriage return U+000D
771 %x74 / ; t tab U+0009
772 %x75 4HEXDIG ) ; uXXXX U+XXXX
773
774 escape = %x5C ; \
775
776 quotation-mark = %x22 ; "
777
778 unescaped = %x20-21 / %x23-5B / %x5D-10FFFF
779 */
780static inline bool addHexDigit(char digit, char32_t *result)
781{
782 *result <<= 4;
783 const int h = fromHex(c: digit);
784 if (h != -1) {
785 *result |= h;
786 return true;
787 }
788
789 return false;
790}
791
792static inline bool scanEscapeSequence(const char *&json, const char *end, char32_t *ch)
793{
794 ++json;
795 if (json >= end)
796 return false;
797
798 DEBUG << "scan escape" << (char)*json;
799 uchar escaped = *json++;
800 switch (escaped) {
801 case '"':
802 *ch = '"'; break;
803 case '\\':
804 *ch = '\\'; break;
805 case '/':
806 *ch = '/'; break;
807 case 'b':
808 *ch = 0x8; break;
809 case 'f':
810 *ch = 0xc; break;
811 case 'n':
812 *ch = 0xa; break;
813 case 'r':
814 *ch = 0xd; break;
815 case 't':
816 *ch = 0x9; break;
817 case 'u': {
818 *ch = 0;
819 if (json > end - 4)
820 return false;
821 for (int i = 0; i < 4; ++i) {
822 if (!addHexDigit(digit: *json, result: ch))
823 return false;
824 ++json;
825 }
826 return true;
827 }
828 default:
829 // this is not as strict as one could be, but allows for more Json files
830 // to be parsed correctly.
831 *ch = escaped;
832 return true;
833 }
834 return true;
835}
836
837static inline bool scanUtf8Char(const char *&json, const char *end, char32_t *result)
838{
839 const auto *usrc = reinterpret_cast<const uchar *>(json);
840 const auto *uend = reinterpret_cast<const uchar *>(end);
841 const uchar b = *usrc++;
842 qsizetype res = QUtf8Functions::fromUtf8<QUtf8BaseTraits>(b, dst&: result, src&: usrc, end: uend);
843 if (res < 0)
844 return false;
845
846 json = reinterpret_cast<const char *>(usrc);
847 return true;
848}
849
850bool Parser::parseString()
851{
852 const char *start = json;
853
854 // try to parse a utf-8 string without escape sequences, and note whether it's 7bit ASCII.
855
856 BEGIN << "parse string" << json;
857 bool isUtf8 = true;
858 bool isAscii = true;
859 while (json < end) {
860 char32_t ch = 0;
861 if (*json == '"')
862 break;
863 if (*json == '\\') {
864 isAscii = false;
865 // If we find escape sequences, we store UTF-16 as there are some
866 // escape sequences which are hard to represent in UTF-8.
867 // (plain "\\ud800" for example)
868 isUtf8 = false;
869 break;
870 }
871 if (!scanUtf8Char(json, end, result: &ch)) {
872 lastError = QJsonParseError::IllegalUTF8String;
873 return false;
874 }
875 if (ch > 0x7f)
876 isAscii = false;
877 DEBUG << " " << ch << char(ch);
878 }
879 ++json;
880 DEBUG << "end of string";
881 if (json >= end) {
882 lastError = QJsonParseError::UnterminatedString;
883 return false;
884 }
885
886 // no escape sequences, we are done
887 if (isUtf8) {
888 if (isAscii)
889 container->appendAsciiString(str: start, len: json - start - 1);
890 else
891 container->appendUtf8String(str: start, len: json - start - 1);
892 END;
893 return true;
894 }
895
896 DEBUG << "has escape sequences";
897
898 json = start;
899
900 QString ucs4;
901 while (json < end) {
902 char32_t ch = 0;
903 if (*json == '"')
904 break;
905 else if (*json == '\\') {
906 if (!scanEscapeSequence(json, end, ch: &ch)) {
907 lastError = QJsonParseError::IllegalEscapeSequence;
908 return false;
909 }
910 } else {
911 if (!scanUtf8Char(json, end, result: &ch)) {
912 lastError = QJsonParseError::IllegalUTF8String;
913 return false;
914 }
915 }
916 ucs4.append(v: QChar::fromUcs4(c: ch));
917 }
918 ++json;
919
920 if (json >= end) {
921 lastError = QJsonParseError::UnterminatedString;
922 return false;
923 }
924
925 container->appendByteData(data: reinterpret_cast<const char *>(ucs4.constData()), len: ucs4.size() * 2,
926 type: QCborValue::String, extraFlags: QtCbor::Element::StringIsUtf16);
927 END;
928 return true;
929}
930
931QT_END_NAMESPACE
932

source code of qtbase/src/corelib/serialization/qjsonparser.cpp