1/****************************************************************************
2**
3** Copyright (C) 2016 The Qt Company Ltd.
4** Copyright (C) 2016 Intel Corporation.
5** Contact: https://www.qt.io/licensing/
6**
7** This file is part of the QtCore module of the Qt Toolkit.
8**
9** $QT_BEGIN_LICENSE:LGPL$
10** Commercial License Usage
11** Licensees holding valid commercial Qt licenses may use this file in
12** accordance with the commercial license agreement provided with the
13** Software or, alternatively, in accordance with the terms contained in
14** a written agreement between you and The Qt Company. For licensing terms
15** and conditions see https://www.qt.io/terms-conditions. For further
16** information use the contact form at https://www.qt.io/contact-us.
17**
18** GNU Lesser General Public License Usage
19** Alternatively, this file may be used under the terms of the GNU Lesser
20** General Public License version 3 as published by the Free Software
21** Foundation and appearing in the file LICENSE.LGPL3 included in the
22** packaging of this file. Please review the following information to
23** ensure the GNU Lesser General Public License version 3 requirements
24** will be met: https://www.gnu.org/licenses/lgpl-3.0.html.
25**
26** GNU General Public License Usage
27** Alternatively, this file may be used under the terms of the GNU
28** General Public License version 2.0 or (at your option) the GNU General
29** Public license version 3 or any later version approved by the KDE Free
30** Qt Foundation. The licenses are as published by the Free Software
31** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3
32** included in the packaging of this file. Please review the following
33** information to ensure the GNU General Public License requirements will
34** be met: https://www.gnu.org/licenses/gpl-2.0.html and
35** https://www.gnu.org/licenses/gpl-3.0.html.
36**
37** $QT_END_LICENSE$
38**
39****************************************************************************/
40
41#ifndef QT_BOOTSTRAPPED
42#include <qcoreapplication.h>
43#endif
44#include <qdebug.h>
45#include "qjsonparser_p.h"
46#include "qjson_p.h"
47#include "private/qutfcodec_p.h"
48#include "private/qcborvalue_p.h"
49#include "private/qnumeric_p.h"
50
51//#define PARSER_DEBUG
52#ifdef PARSER_DEBUG
53static int indent = 0;
54#define BEGIN qDebug() << QByteArray(4*indent++, ' ').constData() << "pos=" << current
55#define END --indent
56#define DEBUG qDebug() << QByteArray(4*indent, ' ').constData()
57#else
58#define BEGIN if (1) ; else qDebug()
59#define END do {} while (0)
60#define DEBUG if (1) ; else qDebug()
61#endif
62
63static const int nestingLimit = 1024;
64
65QT_BEGIN_NAMESPACE
66
67// error strings for the JSON parser
68#define JSONERR_OK QT_TRANSLATE_NOOP("QJsonParseError", "no error occurred")
69#define JSONERR_UNTERM_OBJ QT_TRANSLATE_NOOP("QJsonParseError", "unterminated object")
70#define JSONERR_MISS_NSEP QT_TRANSLATE_NOOP("QJsonParseError", "missing name separator")
71#define JSONERR_UNTERM_AR QT_TRANSLATE_NOOP("QJsonParseError", "unterminated array")
72#define JSONERR_MISS_VSEP QT_TRANSLATE_NOOP("QJsonParseError", "missing value separator")
73#define JSONERR_ILLEGAL_VAL QT_TRANSLATE_NOOP("QJsonParseError", "illegal value")
74#define JSONERR_END_OF_NUM QT_TRANSLATE_NOOP("QJsonParseError", "invalid termination by number")
75#define JSONERR_ILLEGAL_NUM QT_TRANSLATE_NOOP("QJsonParseError", "illegal number")
76#define JSONERR_STR_ESC_SEQ QT_TRANSLATE_NOOP("QJsonParseError", "invalid escape sequence")
77#define JSONERR_STR_UTF8 QT_TRANSLATE_NOOP("QJsonParseError", "invalid UTF8 string")
78#define JSONERR_UTERM_STR QT_TRANSLATE_NOOP("QJsonParseError", "unterminated string")
79#define JSONERR_MISS_OBJ QT_TRANSLATE_NOOP("QJsonParseError", "object is missing after a comma")
80#define JSONERR_DEEP_NEST QT_TRANSLATE_NOOP("QJsonParseError", "too deeply nested document")
81#define JSONERR_DOC_LARGE QT_TRANSLATE_NOOP("QJsonParseError", "too large document")
82#define JSONERR_GARBAGEEND QT_TRANSLATE_NOOP("QJsonParseError", "garbage at the end of the document")
83
84/*!
85 \class QJsonParseError
86 \inmodule QtCore
87 \ingroup json
88 \ingroup shared
89 \reentrant
90 \since 5.0
91
92 \brief The QJsonParseError class is used to report errors during JSON parsing.
93
94 \sa {JSON Support in Qt}, {JSON Save Game Example}
95*/
96
97/*!
98 \enum QJsonParseError::ParseError
99
100 This enum describes the type of error that occurred during the parsing of a JSON document.
101
102 \value NoError No error occurred
103 \value UnterminatedObject An object is not correctly terminated with a closing curly bracket
104 \value MissingNameSeparator A comma separating different items is missing
105 \value UnterminatedArray The array is not correctly terminated with a closing square bracket
106 \value MissingValueSeparator A colon separating keys from values inside objects is missing
107 \value IllegalValue The value is illegal
108 \value TerminationByNumber The input stream ended while parsing a number
109 \value IllegalNumber The number is not well formed
110 \value IllegalEscapeSequence An illegal escape sequence occurred in the input
111 \value IllegalUTF8String An illegal UTF8 sequence occurred in the input
112 \value UnterminatedString A string wasn't terminated with a quote
113 \value MissingObject An object was expected but couldn't be found
114 \value DeepNesting The JSON document is too deeply nested for the parser to parse it
115 \value DocumentTooLarge The JSON document is too large for the parser to parse it
116 \value GarbageAtEnd The parsed document contains additional garbage characters at the end
117
118*/
119
120/*!
121 \variable QJsonParseError::error
122
123 Contains the type of the parse error. Is equal to QJsonParseError::NoError if the document
124 was parsed correctly.
125
126 \sa ParseError, errorString()
127*/
128
129
130/*!
131 \variable QJsonParseError::offset
132
133 Contains the offset in the input string where the parse error occurred.
134
135 \sa error, errorString()
136*/
137
138/*!
139 Returns the human-readable message appropriate to the reported JSON parsing error.
140
141 \sa error
142 */
143QString QJsonParseError::errorString() const
144{
145 const char *sz = "";
146 switch (error) {
147 case NoError:
148 sz = JSONERR_OK;
149 break;
150 case UnterminatedObject:
151 sz = JSONERR_UNTERM_OBJ;
152 break;
153 case MissingNameSeparator:
154 sz = JSONERR_MISS_NSEP;
155 break;
156 case UnterminatedArray:
157 sz = JSONERR_UNTERM_AR;
158 break;
159 case MissingValueSeparator:
160 sz = JSONERR_MISS_VSEP;
161 break;
162 case IllegalValue:
163 sz = JSONERR_ILLEGAL_VAL;
164 break;
165 case TerminationByNumber:
166 sz = JSONERR_END_OF_NUM;
167 break;
168 case IllegalNumber:
169 sz = JSONERR_ILLEGAL_NUM;
170 break;
171 case IllegalEscapeSequence:
172 sz = JSONERR_STR_ESC_SEQ;
173 break;
174 case IllegalUTF8String:
175 sz = JSONERR_STR_UTF8;
176 break;
177 case UnterminatedString:
178 sz = JSONERR_UTERM_STR;
179 break;
180 case MissingObject:
181 sz = JSONERR_MISS_OBJ;
182 break;
183 case DeepNesting:
184 sz = JSONERR_DEEP_NEST;
185 break;
186 case DocumentTooLarge:
187 sz = JSONERR_DOC_LARGE;
188 break;
189 case GarbageAtEnd:
190 sz = JSONERR_GARBAGEEND;
191 break;
192 }
193#ifndef QT_BOOTSTRAPPED
194 return QCoreApplication::translate(context: "QJsonParseError", key: sz);
195#else
196 return QLatin1String(sz);
197#endif
198}
199
200using namespace QJsonPrivate;
201
202class StashedContainer
203{
204 Q_DISABLE_COPY_MOVE(StashedContainer)
205public:
206 StashedContainer(QExplicitlySharedDataPointer<QCborContainerPrivate> *container,
207 QCborValue::Type type)
208 : type(type), stashed(std::move(*container)), current(container)
209 {
210 }
211
212 ~StashedContainer()
213 {
214 stashed->append(v: QCborContainerPrivate::makeValue(type, n: -1, d: current->take(),
215 disp: QCborContainerPrivate::MoveContainer));
216 *current = std::move(stashed);
217 }
218
219private:
220 QCborValue::Type type;
221 QExplicitlySharedDataPointer<QCborContainerPrivate> stashed;
222 QExplicitlySharedDataPointer<QCborContainerPrivate> *current;
223};
224
225Parser::Parser(const char *json, int length)
226 : head(json), json(json)
227 , nestingLevel(0)
228 , lastError(QJsonParseError::NoError)
229{
230 end = json + length;
231}
232
233
234
235/*
236
237begin-array = ws %x5B ws ; [ left square bracket
238
239begin-object = ws %x7B ws ; { left curly bracket
240
241end-array = ws %x5D ws ; ] right square bracket
242
243end-object = ws %x7D ws ; } right curly bracket
244
245name-separator = ws %x3A ws ; : colon
246
247value-separator = ws %x2C ws ; , comma
248
249Insignificant whitespace is allowed before or after any of the six
250structural characters.
251
252ws = *(
253 %x20 / ; Space
254 %x09 / ; Horizontal tab
255 %x0A / ; Line feed or New line
256 %x0D ; Carriage return
257 )
258
259*/
260
261enum {
262 Space = 0x20,
263 Tab = 0x09,
264 LineFeed = 0x0a,
265 Return = 0x0d,
266 BeginArray = 0x5b,
267 BeginObject = 0x7b,
268 EndArray = 0x5d,
269 EndObject = 0x7d,
270 NameSeparator = 0x3a,
271 ValueSeparator = 0x2c,
272 Quote = 0x22
273};
274
275void Parser::eatBOM()
276{
277 // eat UTF-8 byte order mark
278 uchar utf8bom[3] = { 0xef, 0xbb, 0xbf };
279 if (end - json > 3 &&
280 (uchar)json[0] == utf8bom[0] &&
281 (uchar)json[1] == utf8bom[1] &&
282 (uchar)json[2] == utf8bom[2])
283 json += 3;
284}
285
286bool Parser::eatSpace()
287{
288 while (json < end) {
289 if (*json > Space)
290 break;
291 if (*json != Space &&
292 *json != Tab &&
293 *json != LineFeed &&
294 *json != Return)
295 break;
296 ++json;
297 }
298 return (json < end);
299}
300
301char Parser::nextToken()
302{
303 if (!eatSpace())
304 return 0;
305 char token = *json++;
306 switch (token) {
307 case BeginArray:
308 case BeginObject:
309 case NameSeparator:
310 case ValueSeparator:
311 case EndArray:
312 case EndObject:
313 case Quote:
314 break;
315 default:
316 token = 0;
317 break;
318 }
319 return token;
320}
321
322/*
323 JSON-text = object / array
324*/
325QCborValue Parser::parse(QJsonParseError *error)
326{
327#ifdef PARSER_DEBUG
328 indent = 0;
329 qDebug(">>>>> parser begin");
330#endif
331 eatBOM();
332 char token = nextToken();
333
334 QCborValue data;
335
336 DEBUG << Qt::hex << (uint)token;
337 if (token == BeginArray) {
338 container = new QCborContainerPrivate;
339 if (!parseArray())
340 goto error;
341 data = QCborContainerPrivate::makeValue(type: QCborValue::Array, n: -1, d: container.take(),
342 disp: QCborContainerPrivate::MoveContainer);
343 } else if (token == BeginObject) {
344 container = new QCborContainerPrivate;
345 if (!parseObject())
346 goto error;
347 data = QCborContainerPrivate::makeValue(type: QCborValue::Map, n: -1, d: container.take(),
348 disp: QCborContainerPrivate::MoveContainer);
349 } else {
350 lastError = QJsonParseError::IllegalValue;
351 goto error;
352 }
353
354 eatSpace();
355 if (json < end) {
356 lastError = QJsonParseError::GarbageAtEnd;
357 goto error;
358 }
359
360 END;
361 {
362 if (error) {
363 error->offset = 0;
364 error->error = QJsonParseError::NoError;
365 }
366
367 return data;
368 }
369
370error:
371#ifdef PARSER_DEBUG
372 qDebug(">>>>> parser error");
373#endif
374 container.reset();
375 if (error) {
376 error->offset = json - head;
377 error->error = lastError;
378 }
379 return QCborValue();
380}
381
382// We need to retain the _last_ value for any duplicate keys and we need to deref containers.
383// Therefore the manual implementation of std::unique().
384template<typename Iterator, typename Compare, typename Assign>
385static Iterator customAssigningUniqueLast(Iterator first, Iterator last,
386 Compare compare, Assign assign)
387{
388 first = std::adjacent_find(first, last, compare);
389 if (first == last)
390 return last;
391
392 Iterator result = first;
393 while (++first != last) {
394 if (!compare(*result, *first))
395 ++result;
396 if (result != first)
397 assign(*result, *first);
398 }
399
400 return ++result;
401}
402
403static void sortContainer(QCborContainerPrivate *container)
404{
405 using Forward = QJsonPrivate::KeyIterator;
406 using Value = Forward::value_type;
407
408 auto compare = [container](const Value &a, const Value &b)
409 {
410 const auto &aKey = a.key();
411 const auto &bKey = b.key();
412
413 Q_ASSERT(aKey.flags & QtCbor::Element::HasByteData);
414 Q_ASSERT(bKey.flags & QtCbor::Element::HasByteData);
415
416 const QtCbor::ByteData *aData = container->byteData(e: aKey);
417 const QtCbor::ByteData *bData = container->byteData(e: bKey);
418
419 if (!aData)
420 return bData ? -1 : 0;
421 if (!bData)
422 return 1;
423
424 // US-ASCII (StringIsAscii flag) is just a special case of UTF-8
425 // string, so we can safely ignore the flag.
426
427 if (aKey.flags & QtCbor::Element::StringIsUtf16) {
428 if (bKey.flags & QtCbor::Element::StringIsUtf16)
429 return QtPrivate::compareStrings(lhs: aData->asStringView(), rhs: bData->asStringView());
430
431 return -QCborContainerPrivate::compareUtf8(b: bData, s: aData->asStringView());
432 } else {
433 if (bKey.flags & QtCbor::Element::StringIsUtf16)
434 return QCborContainerPrivate::compareUtf8(b: aData, s: bData->asStringView());
435
436 // We're missing an explicit UTF-8 to UTF-8 comparison in Qt, but
437 // UTF-8 to UTF-8 comparison retains simple byte ordering, so we'll
438 // abuse the Latin-1 comparison function.
439 return QtPrivate::compareStrings(lhs: aData->asLatin1(), rhs: bData->asLatin1());
440 }
441 };
442
443 // The elements' containers are owned by the outer container, not by the elements themselves.
444 auto move = [](Forward::reference target, Forward::reference source)
445 {
446 QtCbor::Element &targetValue = target.value();
447
448 // If the target has a container, deref it before overwriting, so that we don't leak.
449 if (targetValue.flags & QtCbor::Element::IsContainer)
450 targetValue.container->deref();
451
452 // Do not move, so that we can clear the value afterwards.
453 target = source;
454
455 // Clear the source value, so that we don't store the same container twice.
456 source.value() = QtCbor::Element();
457 };
458
459 std::stable_sort(
460 first: Forward(container->elements.begin()), last: Forward(container->elements.end()),
461 comp: [&compare](const Value &a, const Value &b) { return compare(a, b) < 0; });
462
463 Forward result = customAssigningUniqueLast(
464 first: Forward(container->elements.begin()), last: Forward(container->elements.end()),
465 compare: [&compare](const Value &a, const Value &b) { return compare(a, b) == 0; }, assign: move);
466
467 container->elements.erase(abegin: result.elementsIterator(), aend: container->elements.end());
468}
469
470
471/*
472 object = begin-object [ member *( value-separator member ) ]
473 end-object
474*/
475
476bool Parser::parseObject()
477{
478 if (++nestingLevel > nestingLimit) {
479 lastError = QJsonParseError::DeepNesting;
480 return false;
481 }
482
483 BEGIN << "parseObject" << json;
484
485 char token = nextToken();
486 while (token == Quote) {
487 if (!container)
488 container = new QCborContainerPrivate;
489 if (!parseMember())
490 return false;
491 token = nextToken();
492 if (token != ValueSeparator)
493 break;
494 token = nextToken();
495 if (token == EndObject) {
496 lastError = QJsonParseError::MissingObject;
497 return false;
498 }
499 }
500
501 DEBUG << "end token=" << token;
502 if (token != EndObject) {
503 lastError = QJsonParseError::UnterminatedObject;
504 return false;
505 }
506
507 END;
508
509 --nestingLevel;
510
511 if (container)
512 sortContainer(container: container.data());
513 return true;
514}
515
516/*
517 member = string name-separator value
518*/
519bool Parser::parseMember()
520{
521 BEGIN << "parseMember";
522
523 if (!parseString())
524 return false;
525 char token = nextToken();
526 if (token != NameSeparator) {
527 lastError = QJsonParseError::MissingNameSeparator;
528 return false;
529 }
530 if (!eatSpace()) {
531 lastError = QJsonParseError::UnterminatedObject;
532 return false;
533 }
534 if (!parseValue())
535 return false;
536
537 END;
538 return true;
539}
540
541/*
542 array = begin-array [ value *( value-separator value ) ] end-array
543*/
544bool Parser::parseArray()
545{
546 BEGIN << "parseArray";
547
548 if (++nestingLevel > nestingLimit) {
549 lastError = QJsonParseError::DeepNesting;
550 return false;
551 }
552
553 if (!eatSpace()) {
554 lastError = QJsonParseError::UnterminatedArray;
555 return false;
556 }
557 if (*json == EndArray) {
558 nextToken();
559 } else {
560 while (1) {
561 if (!eatSpace()) {
562 lastError = QJsonParseError::UnterminatedArray;
563 return false;
564 }
565 if (!container)
566 container = new QCborContainerPrivate;
567 if (!parseValue())
568 return false;
569 char token = nextToken();
570 if (token == EndArray)
571 break;
572 else if (token != ValueSeparator) {
573 if (!eatSpace())
574 lastError = QJsonParseError::UnterminatedArray;
575 else
576 lastError = QJsonParseError::MissingValueSeparator;
577 return false;
578 }
579 }
580 }
581
582 DEBUG << "size =" << (container ? container->elements.length() : 0);
583 END;
584
585 --nestingLevel;
586
587 return true;
588}
589
590/*
591value = false / null / true / object / array / number / string
592
593*/
594
595bool Parser::parseValue()
596{
597 BEGIN << "parse Value" << json;
598
599 switch (*json++) {
600 case 'n':
601 if (end - json < 4) {
602 lastError = QJsonParseError::IllegalValue;
603 return false;
604 }
605 if (*json++ == 'u' &&
606 *json++ == 'l' &&
607 *json++ == 'l') {
608 container->append(v: QCborValue(QCborValue::Null));
609 DEBUG << "value: null";
610 END;
611 return true;
612 }
613 lastError = QJsonParseError::IllegalValue;
614 return false;
615 case 't':
616 if (end - json < 4) {
617 lastError = QJsonParseError::IllegalValue;
618 return false;
619 }
620 if (*json++ == 'r' &&
621 *json++ == 'u' &&
622 *json++ == 'e') {
623 container->append(v: QCborValue(true));
624 DEBUG << "value: true";
625 END;
626 return true;
627 }
628 lastError = QJsonParseError::IllegalValue;
629 return false;
630 case 'f':
631 if (end - json < 5) {
632 lastError = QJsonParseError::IllegalValue;
633 return false;
634 }
635 if (*json++ == 'a' &&
636 *json++ == 'l' &&
637 *json++ == 's' &&
638 *json++ == 'e') {
639 container->append(v: QCborValue(false));
640 DEBUG << "value: false";
641 END;
642 return true;
643 }
644 lastError = QJsonParseError::IllegalValue;
645 return false;
646 case Quote: {
647 if (!parseString())
648 return false;
649 DEBUG << "value: string";
650 END;
651 return true;
652 }
653 case BeginArray: {
654 StashedContainer stashedContainer(&container, QCborValue::Array);
655 if (!parseArray())
656 return false;
657 DEBUG << "value: array";
658 END;
659 return true;
660 }
661 case BeginObject: {
662 StashedContainer stashedContainer(&container, QCborValue::Map);
663 if (!parseObject())
664 return false;
665 DEBUG << "value: object";
666 END;
667 return true;
668 }
669 case ValueSeparator:
670 // Essentially missing value, but after a colon, not after a comma
671 // like the other MissingObject errors.
672 lastError = QJsonParseError::IllegalValue;
673 return false;
674 case EndObject:
675 case EndArray:
676 lastError = QJsonParseError::MissingObject;
677 return false;
678 default:
679 --json;
680 if (!parseNumber())
681 return false;
682 DEBUG << "value: number";
683 END;
684 }
685
686 return true;
687}
688
689
690
691
692
693/*
694 number = [ minus ] int [ frac ] [ exp ]
695 decimal-point = %x2E ; .
696 digit1-9 = %x31-39 ; 1-9
697 e = %x65 / %x45 ; e E
698 exp = e [ minus / plus ] 1*DIGIT
699 frac = decimal-point 1*DIGIT
700 int = zero / ( digit1-9 *DIGIT )
701 minus = %x2D ; -
702 plus = %x2B ; +
703 zero = %x30 ; 0
704
705*/
706
707bool Parser::parseNumber()
708{
709 BEGIN << "parseNumber" << json;
710
711 const char *start = json;
712 bool isInt = true;
713
714 // minus
715 if (json < end && *json == '-')
716 ++json;
717
718 // int = zero / ( digit1-9 *DIGIT )
719 if (json < end && *json == '0') {
720 ++json;
721 } else {
722 while (json < end && *json >= '0' && *json <= '9')
723 ++json;
724 }
725
726 // frac = decimal-point 1*DIGIT
727 if (json < end && *json == '.') {
728 isInt = false;
729 ++json;
730 while (json < end && *json >= '0' && *json <= '9')
731 ++json;
732 }
733
734 // exp = e [ minus / plus ] 1*DIGIT
735 if (json < end && (*json == 'e' || *json == 'E')) {
736 isInt = false;
737 ++json;
738 if (json < end && (*json == '-' || *json == '+'))
739 ++json;
740 while (json < end && *json >= '0' && *json <= '9')
741 ++json;
742 }
743
744 if (json >= end) {
745 lastError = QJsonParseError::TerminationByNumber;
746 return false;
747 }
748
749 const QByteArray number = QByteArray::fromRawData(start, size: json - start);
750 DEBUG << "numberstring" << number;
751
752 if (isInt) {
753 bool ok;
754 qlonglong n = number.toLongLong(ok: &ok);
755 if (ok) {
756 container->append(v: QCborValue(n));
757 END;
758 return true;
759 }
760 }
761
762 bool ok;
763 double d = number.toDouble(ok: &ok);
764
765 if (!ok) {
766 lastError = QJsonParseError::IllegalNumber;
767 return false;
768 }
769
770 qint64 n;
771 if (convertDoubleTo(v: d, value: &n))
772 container->append(v: QCborValue(n));
773 else
774 container->append(v: QCborValue(d));
775
776 END;
777 return true;
778}
779
780/*
781
782 string = quotation-mark *char quotation-mark
783
784 char = unescaped /
785 escape (
786 %x22 / ; " quotation mark U+0022
787 %x5C / ; \ reverse solidus U+005C
788 %x2F / ; / solidus U+002F
789 %x62 / ; b backspace U+0008
790 %x66 / ; f form feed U+000C
791 %x6E / ; n line feed U+000A
792 %x72 / ; r carriage return U+000D
793 %x74 / ; t tab U+0009
794 %x75 4HEXDIG ) ; uXXXX U+XXXX
795
796 escape = %x5C ; \
797
798 quotation-mark = %x22 ; "
799
800 unescaped = %x20-21 / %x23-5B / %x5D-10FFFF
801 */
802static inline bool addHexDigit(char digit, uint *result)
803{
804 *result <<= 4;
805 if (digit >= '0' && digit <= '9')
806 *result |= (digit - '0');
807 else if (digit >= 'a' && digit <= 'f')
808 *result |= (digit - 'a') + 10;
809 else if (digit >= 'A' && digit <= 'F')
810 *result |= (digit - 'A') + 10;
811 else
812 return false;
813 return true;
814}
815
816static inline bool scanEscapeSequence(const char *&json, const char *end, uint *ch)
817{
818 ++json;
819 if (json >= end)
820 return false;
821
822 DEBUG << "scan escape" << (char)*json;
823 uint escaped = *json++;
824 switch (escaped) {
825 case '"':
826 *ch = '"'; break;
827 case '\\':
828 *ch = '\\'; break;
829 case '/':
830 *ch = '/'; break;
831 case 'b':
832 *ch = 0x8; break;
833 case 'f':
834 *ch = 0xc; break;
835 case 'n':
836 *ch = 0xa; break;
837 case 'r':
838 *ch = 0xd; break;
839 case 't':
840 *ch = 0x9; break;
841 case 'u': {
842 *ch = 0;
843 if (json > end - 4)
844 return false;
845 for (int i = 0; i < 4; ++i) {
846 if (!addHexDigit(digit: *json, result: ch))
847 return false;
848 ++json;
849 }
850 return true;
851 }
852 default:
853 // this is not as strict as one could be, but allows for more Json files
854 // to be parsed correctly.
855 *ch = escaped;
856 return true;
857 }
858 return true;
859}
860
861static inline bool scanUtf8Char(const char *&json, const char *end, uint *result)
862{
863 const auto *usrc = reinterpret_cast<const uchar *>(json);
864 const auto *uend = reinterpret_cast<const uchar *>(end);
865 const uchar b = *usrc++;
866 int res = QUtf8Functions::fromUtf8<QUtf8BaseTraits>(b, dst&: result, src&: usrc, end: uend);
867 if (res < 0)
868 return false;
869
870 json = reinterpret_cast<const char *>(usrc);
871 return true;
872}
873
874bool Parser::parseString()
875{
876 const char *start = json;
877
878 // try to parse a utf-8 string without escape sequences, and note whether it's 7bit ASCII.
879
880 BEGIN << "parse string" << json;
881 bool isUtf8 = true;
882 bool isAscii = true;
883 while (json < end) {
884 uint ch = 0;
885 if (*json == '"')
886 break;
887 if (*json == '\\') {
888 isAscii = false;
889 // If we find escape sequences, we store UTF-16 as there are some
890 // escape sequences which are hard to represent in UTF-8.
891 // (plain "\\ud800" for example)
892 isUtf8 = false;
893 break;
894 }
895 if (!scanUtf8Char(json, end, result: &ch)) {
896 lastError = QJsonParseError::IllegalUTF8String;
897 return false;
898 }
899 if (ch > 0x7f)
900 isAscii = false;
901 DEBUG << " " << ch << char(ch);
902 }
903 ++json;
904 DEBUG << "end of string";
905 if (json >= end) {
906 lastError = QJsonParseError::UnterminatedString;
907 return false;
908 }
909
910 // no escape sequences, we are done
911 if (isUtf8) {
912 container->appendByteData(data: start, len: json - start - 1, type: QCborValue::String,
913 extraFlags: isAscii ? QtCbor::Element::StringIsAscii
914 : QtCbor::Element::ValueFlags {});
915 END;
916 return true;
917 }
918
919 DEBUG << "has escape sequences";
920
921 json = start;
922
923 QString ucs4;
924 while (json < end) {
925 uint ch = 0;
926 if (*json == '"')
927 break;
928 else if (*json == '\\') {
929 if (!scanEscapeSequence(json, end, ch: &ch)) {
930 lastError = QJsonParseError::IllegalEscapeSequence;
931 return false;
932 }
933 } else {
934 if (!scanUtf8Char(json, end, result: &ch)) {
935 lastError = QJsonParseError::IllegalUTF8String;
936 return false;
937 }
938 }
939 if (QChar::requiresSurrogates(ucs4: ch)) {
940 ucs4.append(c: QChar::highSurrogate(ucs4: ch));
941 ucs4.append(c: QChar::lowSurrogate(ucs4: ch));
942 } else {
943 ucs4.append(c: QChar(ushort(ch)));
944 }
945 }
946 ++json;
947
948 if (json >= end) {
949 lastError = QJsonParseError::UnterminatedString;
950 return false;
951 }
952
953 container->appendByteData(data: reinterpret_cast<const char *>(ucs4.utf16()), len: ucs4.size() * 2,
954 type: QCborValue::String, extraFlags: QtCbor::Element::StringIsUtf16);
955 END;
956 return true;
957}
958
959QT_END_NAMESPACE
960

source code of qtbase/src/corelib/serialization/qjsonparser.cpp