1/****************************************************************************
2**
3** Copyright (C) 2016 The Qt Company Ltd.
4** Copyright (C) 2016 Intel Corporation.
5** Contact: https://www.qt.io/licensing/
6**
7** This file is part of the QtCore module of the Qt Toolkit.
8**
9** $QT_BEGIN_LICENSE:LGPL$
10** Commercial License Usage
11** Licensees holding valid commercial Qt licenses may use this file in
12** accordance with the commercial license agreement provided with the
13** Software or, alternatively, in accordance with the terms contained in
14** a written agreement between you and The Qt Company. For licensing terms
15** and conditions see https://www.qt.io/terms-conditions. For further
16** information use the contact form at https://www.qt.io/contact-us.
17**
18** GNU Lesser General Public License Usage
19** Alternatively, this file may be used under the terms of the GNU Lesser
20** General Public License version 3 as published by the Free Software
21** Foundation and appearing in the file LICENSE.LGPL3 included in the
22** packaging of this file. Please review the following information to
23** ensure the GNU Lesser General Public License version 3 requirements
24** will be met: https://www.gnu.org/licenses/lgpl-3.0.html.
25**
26** GNU General Public License Usage
27** Alternatively, this file may be used under the terms of the GNU
28** General Public License version 2.0 or (at your option) the GNU General
29** Public license version 3 or any later version approved by the KDE Free
30** Qt Foundation. The licenses are as published by the Free Software
31** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3
32** included in the packaging of this file. Please review the following
33** information to ensure the GNU General Public License requirements will
34** be met: https://www.gnu.org/licenses/gpl-2.0.html and
35** https://www.gnu.org/licenses/gpl-3.0.html.
36**
37** $QT_END_LICENSE$
38**
39****************************************************************************/
40
41#ifndef QJSON_P_H
42#define QJSON_P_H
43
44//
45// W A R N I N G
46// -------------
47//
48// This file is not part of the Qt API. It exists purely as an
49// implementation detail. This header file may change from version to
50// version without notice, or even be removed.
51//
52// We mean it.
53//
54
55#include <qjsonobject.h>
56#include <qjsonvalue.h>
57#include <qjsondocument.h>
58#include <qjsonarray.h>
59#include <qatomic.h>
60#include <qstring.h>
61#include <qendian.h>
62#include <qnumeric.h>
63
64#include "private/qendian_p.h"
65#include "private/qsimd_p.h"
66
67#include <limits.h>
68#include <limits>
69#include <type_traits>
70
71QT_BEGIN_NAMESPACE
72
73// in qstring.cpp
74void qt_to_latin1_unchecked(uchar *dst, const ushort *uc, qsizetype len);
75void qt_from_latin1(ushort *dst, const char *str, size_t size) noexcept;
76
77/*
78 This defines a binary data structure for Json data. The data structure is optimised for fast reading
79 and minimum allocations. The whole data structure can be mmap'ed and used directly.
80
81 In most cases the binary structure is not as space efficient as a utf8 encoded text representation, but
82 much faster to access.
83
84 The size requirements are:
85
86 String:
87 Latin1 data: 2 bytes header + string.length()
88 Full Unicode: 4 bytes header + 2*(string.length())
89
90 Values: 4 bytes + size of data (size can be 0 for some data)
91 bool: 0 bytes
92 double: 8 bytes (0 if integer with less than 27bits)
93 string: see above
94 array: size of array
95 object: size of object
96 Array: 12 bytes + 4*length + size of Value data
97 Object: 12 bytes + 8*length + size of Key Strings + size of Value data
98
99 For an example such as
100
101 { // object: 12 + 5*8 = 52
102 "firstName": "John", // key 12, value 8 = 20
103 "lastName" : "Smith", // key 12, value 8 = 20
104 "age" : 25, // key 8, value 0 = 8
105 "address" : // key 12, object below = 140
106 { // object: 12 + 4*8
107 "streetAddress": "21 2nd Street", // key 16, value 16
108 "city" : "New York", // key 8, value 12
109 "state" : "NY", // key 8, value 4
110 "postalCode" : "10021" // key 12, value 8
111 }, // object total: 128
112 "phoneNumber": // key: 16, value array below = 172
113 [ // array: 12 + 2*4 + values below: 156
114 { // object 12 + 2*8
115 "type" : "home", // key 8, value 8
116 "number": "212 555-1234" // key 8, value 16
117 }, // object total: 68
118 { // object 12 + 2*8
119 "type" : "fax", // key 8, value 8
120 "number": "646 555-4567" // key 8, value 16
121 } // object total: 68
122 ] // array total: 156
123 } // great total: 412 bytes
124
125 The uncompressed text file used roughly 500 bytes, so in this case we end up using about
126 the same space as the text representation.
127
128 Other measurements have shown a slightly bigger binary size than a compact text
129 representation where all possible whitespace was stripped out.
130*/
131#define Q_DECLARE_JSONPRIVATE_TYPEINFO(Class, Flags) } Q_DECLARE_TYPEINFO(QJsonPrivate::Class, Flags); namespace QJsonPrivate {
132namespace QJsonPrivate {
133
134class Array;
135class Object;
136class Value;
137class Entry;
138
139template<typename T>
140using q_littleendian = QLEInteger<T>;
141
142typedef q_littleendian<short> qle_short;
143typedef q_littleendian<unsigned short> qle_ushort;
144typedef q_littleendian<int> qle_int;
145typedef q_littleendian<unsigned int> qle_uint;
146
147template<int pos, int width>
148using qle_bitfield = QLEIntegerBitfield<uint, pos, width>;
149
150template<int pos, int width>
151using qle_signedbitfield = QLEIntegerBitfield<int, pos, width>;
152
153typedef qle_uint offset;
154
155// round the size up to the next 4 byte boundary
156inline int alignedSize(int size) { return (size + 3) & ~3; }
157
158const int MaxLatin1Length = 0x7fff;
159
160static inline bool useCompressed(QStringView s)
161{
162 if (s.length() > MaxLatin1Length)
163 return false;
164 return QtPrivate::isLatin1(s);
165}
166
167static inline bool useCompressed(QLatin1String s)
168{
169 return s.size() <= MaxLatin1Length;
170}
171
172template <typename T>
173static inline int qStringSize(T string, bool compress)
174{
175 int l = 2 + string.size();
176 if (!compress)
177 l *= 2;
178 return alignedSize(l);
179}
180
181// returns INT_MAX if it can't compress it into 28 bits
182static inline int compressedNumber(double d)
183{
184 // this relies on details of how ieee floats are represented
185 const int exponent_off = 52;
186 const quint64 fraction_mask = 0x000fffffffffffffull;
187 const quint64 exponent_mask = 0x7ff0000000000000ull;
188
189 quint64 val;
190 memcpy (&val, &d, sizeof(double));
191 int exp = (int)((val & exponent_mask) >> exponent_off) - 1023;
192 if (exp < 0 || exp > 25)
193 return INT_MAX;
194
195 quint64 non_int = val & (fraction_mask >> exp);
196 if (non_int)
197 return INT_MAX;
198
199 bool neg = (val >> 63) != 0;
200 val &= fraction_mask;
201 val |= ((quint64)1 << 52);
202 int res = (int)(val >> (52 - exp));
203 return neg ? -res : res;
204}
205
206class Latin1String;
207
208class String
209{
210public:
211 explicit String(const char *data) { d = (Data *)data; }
212
213 struct Data {
214 qle_uint length;
215 qle_ushort utf16[1];
216 };
217
218 Data *d;
219
220 int byteSize() const { return sizeof(uint) + sizeof(ushort) * d->length; }
221 bool isValid(int maxSize) const {
222 // Check byteSize() <= maxSize, avoiding integer overflow
223 maxSize -= sizeof(uint);
224 return maxSize >= 0 && uint(d->length) <= maxSize / sizeof(ushort);
225 }
226
227 inline String &operator=(QStringView str)
228 {
229 d->length = str.length();
230 qToLittleEndian<quint16>(str.utf16(), str.length(), d->utf16);
231 fillTrailingZeros();
232 return *this;
233 }
234
235 inline String &operator=(QLatin1String str)
236 {
237 d->length = str.size();
238#if Q_BYTE_ORDER == Q_BIG_ENDIAN
239 for (int i = 0; i < str.size(); ++i)
240 d->utf16[i] = str[i].unicode();
241#else
242 qt_from_latin1((ushort *)d->utf16, str.data(), str.size());
243#endif
244 fillTrailingZeros();
245 return *this;
246 }
247
248 void fillTrailingZeros()
249 {
250 if (d->length & 1)
251 d->utf16[d->length] = 0;
252 }
253
254 inline bool operator ==(QStringView str) const {
255 int slen = str.length();
256 int l = d->length;
257 if (slen != l)
258 return false;
259 const ushort *s = (const ushort *)str.utf16();
260 const qle_ushort *a = d->utf16;
261 const ushort *b = s;
262 while (l-- && *a == *b)
263 a++,b++;
264 return (l == -1);
265 }
266 inline bool operator !=(QStringView str) const {
267 return !operator ==(str);
268 }
269 inline bool operator >=(QStringView str) const {
270 // ###
271 return toString() >= str;
272 }
273
274 inline bool operator<(const Latin1String &str) const;
275 inline bool operator>=(const Latin1String &str) const { return !operator <(str); }
276 inline bool operator ==(const Latin1String &str) const;
277
278 inline bool operator ==(const String &str) const {
279 if (d->length != str.d->length)
280 return false;
281 return !memcmp(d->utf16, str.d->utf16, d->length*sizeof(ushort));
282 }
283 inline bool operator<(const String &other) const;
284 inline bool operator >=(const String &other) const { return !(*this < other); }
285
286 inline QString toString() const {
287#if Q_BYTE_ORDER == Q_LITTLE_ENDIAN
288 return QString((QChar *)d->utf16, d->length);
289#else
290 int l = d->length;
291 QString str(l, Qt::Uninitialized);
292 QChar *ch = str.data();
293 for (int i = 0; i < l; ++i)
294 ch[i] = QChar(d->utf16[i]);
295 return str;
296#endif
297 }
298
299};
300
301class Latin1String
302{
303public:
304 explicit Latin1String(const char *data) { d = (Data *)data; }
305
306 struct Data {
307 qle_ushort length;
308 char latin1[1];
309 };
310 Data *d;
311
312 int byteSize() const { return sizeof(ushort) + sizeof(char)*(d->length); }
313 bool isValid(int maxSize) const {
314 return byteSize() <= maxSize;
315 }
316
317 inline Latin1String &operator=(QStringView str)
318 {
319 int len = d->length = str.length();
320 uchar *l = (uchar *)d->latin1;
321 const ushort *uc = (const ushort *)str.utf16();
322 qt_to_latin1_unchecked(l, uc, len);
323
324 fillTrailingZeros();
325 return *this;
326 }
327
328 inline Latin1String &operator=(QLatin1String str)
329 {
330 int len = d->length = str.size();
331 uchar *l = (uchar *)d->latin1;
332 memcpy(l, str.data(), len);
333
334 fillTrailingZeros();
335 return *this;
336 }
337
338 void fillTrailingZeros()
339 {
340 uchar *l = (uchar *)d->latin1;
341 for (int len = d->length; (quintptr)(l + len) & 0x3; ++len)
342 l[len] = 0;
343 }
344
345 QLatin1String toQLatin1String() const noexcept {
346 return QLatin1String(d->latin1, d->length);
347 }
348
349 inline bool operator<(const String &str) const
350 {
351 const qle_ushort *uc = (qle_ushort *) str.d->utf16;
352 if (!uc || *uc == 0)
353 return false;
354
355 const uchar *c = (uchar *)d->latin1;
356 const uchar *e = c + qMin((int)d->length, (int)str.d->length);
357
358 while (c < e) {
359 if (*c != *uc)
360 break;
361 ++c;
362 ++uc;
363 }
364 return (c == e ? (int)d->length < (int)str.d->length : *c < *uc);
365
366 }
367 inline bool operator ==(const String &str) const {
368 return (str == *this);
369 }
370 inline bool operator >=(const String &str) const {
371 return !(*this < str);
372 }
373
374 inline QString toString() const {
375 return QString::fromLatin1(d->latin1, d->length);
376 }
377};
378
379#define DEF_OP(op) \
380 inline bool operator op(Latin1String lhs, Latin1String rhs) noexcept \
381 { \
382 return lhs.toQLatin1String() op rhs.toQLatin1String(); \
383 } \
384 inline bool operator op(QLatin1String lhs, Latin1String rhs) noexcept \
385 { \
386 return lhs op rhs.toQLatin1String(); \
387 } \
388 inline bool operator op(Latin1String lhs, QLatin1String rhs) noexcept \
389 { \
390 return lhs.toQLatin1String() op rhs; \
391 } \
392 inline bool operator op(QStringView lhs, Latin1String rhs) noexcept \
393 { \
394 return lhs op rhs.toQLatin1String(); \
395 } \
396 inline bool operator op(Latin1String lhs, QStringView rhs) noexcept \
397 { \
398 return lhs.toQLatin1String() op rhs; \
399 } \
400 /*end*/
401DEF_OP(==)
402DEF_OP(!=)
403DEF_OP(< )
404DEF_OP(> )
405DEF_OP(<=)
406DEF_OP(>=)
407#undef DEF_OP
408
409inline bool String::operator ==(const Latin1String &str) const
410{
411 if ((int)d->length != (int)str.d->length)
412 return false;
413 const qle_ushort *uc = d->utf16;
414 const qle_ushort *e = uc + d->length;
415 const uchar *c = (uchar *)str.d->latin1;
416
417 while (uc < e) {
418 if (*uc != *c)
419 return false;
420 ++uc;
421 ++c;
422 }
423 return true;
424}
425
426inline bool String::operator <(const String &other) const
427{
428 int alen = d->length;
429 int blen = other.d->length;
430 int l = qMin(alen, blen);
431 qle_ushort *a = d->utf16;
432 qle_ushort *b = other.d->utf16;
433
434 while (l-- && *a == *b)
435 a++,b++;
436 if (l==-1)
437 return (alen < blen);
438 return (ushort)*a < (ushort)*b;
439}
440
441inline bool String::operator<(const Latin1String &str) const
442{
443 const uchar *c = (uchar *) str.d->latin1;
444 if (!c || *c == 0)
445 return false;
446
447 const qle_ushort *uc = d->utf16;
448 const qle_ushort *e = uc + qMin((int)d->length, (int)str.d->length);
449
450 while (uc < e) {
451 if (*uc != *c)
452 break;
453 ++uc;
454 ++c;
455 }
456 return (uc == e ? (int)d->length < (int)str.d->length : (ushort)*uc < *c);
457
458}
459
460template <typename T>
461static inline void copyString(char *dest, T str, bool compress)
462{
463 if (compress) {
464 Latin1String string(dest);
465 string = str;
466 } else {
467 String string(dest);
468 string = str;
469 }
470}
471
472
473/*
474 Base is the base class for both Object and Array. Both classes work more or less the same way.
475 The class starts with a header (defined by the struct below), then followed by data (the data for
476 values in the Array case and Entry's (see below) for objects.
477
478 After the data a table follows (tableOffset points to it) containing Value objects for Arrays, and
479 offsets from the beginning of the object to Entry's in the case of Object.
480
481 Entry's in the Object's table are lexicographically sorted by key in the table(). This allows the usage
482 of a binary search over the keys in an Object.
483 */
484class Base
485{
486public:
487 qle_uint size;
488 union {
489 uint _dummy;
490 qle_bitfield<0, 1> is_object;
491 qle_bitfield<1, 31> length;
492 };
493 offset tableOffset;
494 // content follows here
495
496 inline bool isObject() const { return !!is_object; }
497 inline bool isArray() const { return !isObject(); }
498
499 inline offset *table() const { return (offset *) (((char *) this) + tableOffset); }
500
501 int reserveSpace(uint dataSize, int posInTable, uint numItems, bool replace);
502 void removeItems(int pos, int numItems);
503};
504
505class Object : public Base
506{
507public:
508 Entry *entryAt(int i) const {
509 return reinterpret_cast<Entry *>(((char *)this) + table()[i]);
510 }
511 int indexOf(QStringView key, bool *exists) const;
512 int indexOf(QLatin1String key, bool *exists) const;
513
514 bool isValid(int maxSize) const;
515};
516
517
518class Array : public Base
519{
520public:
521 inline Value at(int i) const;
522 inline Value &operator [](int i);
523
524 bool isValid(int maxSize) const;
525};
526
527
528class Value
529{
530public:
531 enum {
532 MaxSize = (1<<27) - 1
533 };
534 union {
535 uint _dummy;
536 qle_bitfield<0, 3> type;
537 qle_bitfield<3, 1> latinOrIntValue;
538 qle_bitfield<4, 1> latinKey;
539 qle_bitfield<5, 27> value;
540 qle_signedbitfield<5, 27> int_value;
541 };
542
543 inline char *data(const Base *b) const { return ((char *)b) + value; }
544 int usedStorage(const Base *b) const;
545
546 bool toBoolean() const;
547 double toDouble(const Base *b) const;
548 QString toString(const Base *b) const;
549 String asString(const Base *b) const;
550 Latin1String asLatin1String(const Base *b) const;
551 Base *base(const Base *b) const;
552
553 bool isValid(const Base *b) const;
554
555 static int requiredStorage(QJsonValue &v, bool *compressed);
556 static uint valueToStore(const QJsonValue &v, uint offset);
557 static void copyData(const QJsonValue &v, char *dest, bool compressed);
558};
559Q_DECLARE_JSONPRIVATE_TYPEINFO(Value, Q_PRIMITIVE_TYPE)
560
561inline Value Array::at(int i) const
562{
563 return *(Value *) (table() + i);
564}
565
566inline Value &Array::operator [](int i)
567{
568 return *(Value *) (table() + i);
569}
570
571
572
573class Entry {
574public:
575 Value value;
576 // key
577 // value data follows key
578
579 uint size() const {
580 int s = sizeof(Entry);
581 if (value.latinKey)
582 s += shallowLatin1Key().byteSize();
583 else
584 s += shallowKey().byteSize();
585 return alignedSize(s);
586 }
587
588 int usedStorage(Base *b) const {
589 return size() + value.usedStorage(b);
590 }
591
592 String shallowKey() const
593 {
594 Q_ASSERT(!value.latinKey);
595 return String((const char *)this + sizeof(Entry));
596 }
597 Latin1String shallowLatin1Key() const
598 {
599 Q_ASSERT(value.latinKey);
600 return Latin1String((const char *)this + sizeof(Entry));
601 }
602 QString key() const
603 {
604 if (value.latinKey) {
605 return shallowLatin1Key().toString();
606 }
607 return shallowKey().toString();
608 }
609
610 bool isValid(int maxSize) const {
611 if (maxSize < (int)sizeof(Entry))
612 return false;
613 maxSize -= sizeof(Entry);
614 if (value.latinKey)
615 return shallowLatin1Key().isValid(maxSize);
616 return shallowKey().isValid(maxSize);
617 }
618
619 bool operator ==(QStringView key) const;
620 inline bool operator !=(QStringView key) const { return !operator ==(key); }
621 inline bool operator >=(QStringView key) const;
622
623 bool operator==(QLatin1String key) const;
624 inline bool operator!=(QLatin1String key) const { return !operator ==(key); }
625 inline bool operator>=(QLatin1String key) const;
626
627 bool operator ==(const Entry &other) const;
628 bool operator >=(const Entry &other) const;
629};
630
631inline bool Entry::operator >=(QStringView key) const
632{
633 if (value.latinKey)
634 return (shallowLatin1Key() >= key);
635 else
636 return (shallowKey() >= key);
637}
638
639inline bool Entry::operator >=(QLatin1String key) const
640{
641 if (value.latinKey)
642 return shallowLatin1Key() >= key;
643 else
644 return shallowKey() >= QString(key); // ### conversion to QString
645}
646
647inline bool operator <(QStringView key, const Entry &e)
648{ return e >= key; }
649
650inline bool operator<(QLatin1String key, const Entry &e)
651{ return e >= key; }
652
653
654class Header {
655public:
656 qle_uint tag; // 'qbjs'
657 qle_uint version; // 1
658 Base *root() { return (Base *)(this + 1); }
659};
660
661
662inline bool Value::toBoolean() const
663{
664 Q_ASSERT(type == QJsonValue::Bool);
665 return value != 0;
666}
667
668inline double Value::toDouble(const Base *b) const
669{
670 Q_ASSERT(type == QJsonValue::Double);
671 if (latinOrIntValue)
672 return int_value;
673
674 quint64 i = qFromLittleEndian<quint64>((const uchar *)b + value);
675 double d;
676 memcpy(&d, &i, sizeof(double));
677 return d;
678}
679
680inline String Value::asString(const Base *b) const
681{
682 Q_ASSERT(type == QJsonValue::String && !latinOrIntValue);
683 return String(data(b));
684}
685
686inline Latin1String Value::asLatin1String(const Base *b) const
687{
688 Q_ASSERT(type == QJsonValue::String && latinOrIntValue);
689 return Latin1String(data(b));
690}
691
692inline QString Value::toString(const Base *b) const
693{
694 if (latinOrIntValue)
695 return asLatin1String(b).toString();
696 else
697 return asString(b).toString();
698}
699
700inline Base *Value::base(const Base *b) const
701{
702 Q_ASSERT(type == QJsonValue::Array || type == QJsonValue::Object);
703 return reinterpret_cast<Base *>(data(b));
704}
705
706class Data {
707public:
708 enum Validation {
709 Unchecked,
710 Validated,
711 Invalid
712 };
713
714 QAtomicInt ref;
715 int alloc;
716 union {
717 char *rawData;
718 Header *header;
719 };
720 uint compactionCounter : 31;
721 uint ownsData : 1;
722
723 inline Data(char *raw, int a)
724 : alloc(a), rawData(raw), compactionCounter(0), ownsData(true)
725 {
726 }
727 inline Data(int reserved, QJsonValue::Type valueType)
728 : rawData(nullptr), compactionCounter(0), ownsData(true)
729 {
730 Q_ASSERT(valueType == QJsonValue::Array || valueType == QJsonValue::Object);
731
732 alloc = sizeof(Header) + sizeof(Base) + reserved + sizeof(offset);
733 header = (Header *)malloc(alloc);
734 Q_CHECK_PTR(header);
735 header->tag = QJsonDocument::BinaryFormatTag;
736 header->version = 1;
737 Base *b = header->root();
738 b->size = sizeof(Base);
739 b->is_object = (valueType == QJsonValue::Object);
740 b->tableOffset = sizeof(Base);
741 b->length = 0;
742 }
743 inline ~Data()
744 { if (ownsData) free(rawData); }
745
746 uint offsetOf(const void *ptr) const { return (uint)(((char *)ptr - rawData)); }
747
748 QJsonObject toObject(Object *o) const
749 {
750 return QJsonObject(const_cast<Data *>(this), o);
751 }
752
753 QJsonArray toArray(Array *a) const
754 {
755 return QJsonArray(const_cast<Data *>(this), a);
756 }
757
758 Data *clone(Base *b, int reserve = 0)
759 {
760 int size = sizeof(Header) + b->size;
761 if (b == header->root() && ref.loadRelaxed() == 1 && alloc >= size + reserve)
762 return this;
763
764 if (reserve) {
765 if (reserve < 128)
766 reserve = 128;
767 size = qMax(size + reserve, qMin(size *2, (int)Value::MaxSize));
768 if (size > Value::MaxSize) {
769 qWarning("QJson: Document too large to store in data structure");
770 return nullptr;
771 }
772 }
773 char *raw = (char *)malloc(size);
774 Q_CHECK_PTR(raw);
775 memcpy(raw + sizeof(Header), b, b->size);
776 Header *h = (Header *)raw;
777 h->tag = QJsonDocument::BinaryFormatTag;
778 h->version = 1;
779 Data *d = new Data(raw, size);
780 d->compactionCounter = (b == header->root()) ? compactionCounter : 0;
781 return d;
782 }
783
784 void compact();
785 bool valid() const;
786
787private:
788 Q_DISABLE_COPY_MOVE(Data)
789};
790
791}
792
793QT_END_NAMESPACE
794
795#endif // QJSON_P_H
796