1// Copyright (C) 2016 The Qt Company Ltd.
2// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only
3
4//
5// W A R N I N G
6// -------------
7//
8// This file is not part of the Qt API. It exists for the convenience
9// of other Qt classes. This header file may change from version to
10// version without notice, or even be removed.
11//
12// We mean it.
13//
14
15#include <QtCore/private/qglobal_p.h>
16#include <qstringconverter.h>
17#include <qxmlstream.h>
18#include "qxmlstreamgrammar_p.h"
19#include <QtCore/qhash.h>
20#include <QCoreApplication> // Q_DECLARE_TR_FUNCTIONS
21
22
23#include <memory>
24#include <optional>
25
26#ifndef QXMLSTREAM_P_H
27#define QXMLSTREAM_P_H
28
29QT_BEGIN_NAMESPACE
30
31namespace QtPrivate {
32
33class XmlStringRef
34{
35public:
36 const QString *m_string = nullptr;
37 qsizetype m_pos = 0;
38 qsizetype m_size = 0;
39
40 constexpr XmlStringRef() = default;
41 constexpr inline XmlStringRef(const QString *string, qsizetype pos, qsizetype length)
42 : m_string(string), m_pos(pos), m_size((Q_ASSERT(length >= 0), length))
43 {
44 }
45 XmlStringRef(const QString *string)
46 : XmlStringRef(string, 0, string->size())
47 {
48 }
49
50 operator QXmlString() const {
51 if (!m_string)
52 return QXmlString();
53 QStringPrivate d = m_string->data_ptr();
54 d.setBegin(d.data() + m_pos);
55 d.size = m_size;
56 return QXmlString(std::move(d));
57 }
58
59 void clear() { m_string = nullptr; m_pos = 0; m_size= 0; }
60 QStringView view() const { return m_string ? QStringView(m_string->data() + m_pos, m_size) : QStringView(); }
61 bool isEmpty() const { return m_size == 0; }
62 bool isNull() const { return !m_string; }
63 QString toString() const { return view().toString(); }
64
65 using value_type = QStringView::value_type;
66 using size_type = QStringView::size_type;
67 using difference_type = QStringView::difference_type;
68 using pointer = QStringView::pointer;
69 using const_pointer = QStringView::const_pointer;
70 using reference = QStringView::reference;
71 using const_reference = QStringView::const_reference;
72 using iterator = QStringView::iterator;
73 using const_iterator = QStringView::const_iterator;
74 using reverse_iterator = QStringView::reverse_iterator;
75 using const_reverse_iterator = QStringView::const_reverse_iterator;
76
77#define MAKE_MEMBER(name) \
78 auto name () const noexcept { return view(). name (); }
79 MAKE_MEMBER(data)
80 MAKE_MEMBER(size)
81 MAKE_MEMBER(empty)
82 MAKE_MEMBER(begin)
83 MAKE_MEMBER(end)
84 MAKE_MEMBER(cbegin)
85 MAKE_MEMBER(cend)
86 MAKE_MEMBER(rbegin)
87 MAKE_MEMBER(rend)
88 MAKE_MEMBER(crbegin)
89 MAKE_MEMBER(crend)
90#undef MAKE_MEMBER
91
92#define MAKE_OP(op) \
93 friend auto operator op(const XmlStringRef &lhs, const XmlStringRef &rhs) noexcept { return lhs.view() op rhs.view(); } \
94 /*end*/
95 MAKE_OP(==)
96 MAKE_OP(!=)
97 MAKE_OP(<=)
98 MAKE_OP(>=)
99 MAKE_OP(<)
100 MAKE_OP(>)
101#undef MAKE_OP
102#define MAKE_OP(op) \
103 friend auto operator op(const XmlStringRef &lhs, QStringView rhs) noexcept { return lhs.view() op rhs; } \
104 friend auto operator op(QStringView lhs, const XmlStringRef &rhs) noexcept { return lhs op rhs.view(); } \
105 /*end*/
106 MAKE_OP(==)
107 MAKE_OP(!=)
108 MAKE_OP(<=)
109 MAKE_OP(>=)
110 MAKE_OP(<)
111 MAKE_OP(>)
112#undef MAKE_OP
113};
114
115}
116
117using namespace QtPrivate;
118
119template <typename T> class QXmlStreamSimpleStack
120{
121 Q_DISABLE_COPY_MOVE(QXmlStreamSimpleStack)
122
123 T *data;
124 qsizetype tos, cap;
125public:
126 inline QXmlStreamSimpleStack()
127 : data(nullptr), tos(-1), cap(0)
128 {}
129 inline ~QXmlStreamSimpleStack()
130 {
131 if (data) {
132 std::destroy_n(data, size());
133 free(data);
134 }
135 }
136
137 inline void reserve(qsizetype extraCapacity)
138 {
139 if (tos + extraCapacity + 1 > cap) {
140 cap = qMax(a: tos + extraCapacity + 1, b: cap << 1 );
141 void *ptr = realloc(ptr: static_cast<void *>(data), size: cap * sizeof(T));
142 data = reinterpret_cast<T *>(ptr);
143 Q_CHECK_PTR(data);
144 }
145 }
146
147 inline T &push() { reserve(extraCapacity: 1); return rawPush(); }
148 inline T &rawPush() { return *new (data + (++tos)) T; }
149 inline const T &top() const { return data[tos]; }
150 inline T &top() { return data[tos]; }
151 inline T pop() { T t = std::move(data[tos]); std::destroy_at(data + tos); --tos; return t; }
152 inline T &operator[](qsizetype index) { return data[index]; }
153 inline const T &at(qsizetype index) const { return data[index]; }
154 inline qsizetype size() const { return tos + 1; }
155 inline void resize(qsizetype s) { tos = s - 1; }
156 inline bool isEmpty() const { return tos < 0; }
157 inline void clear() { tos = -1; }
158
159 using const_iterator = const T*;
160 using iterator = T*;
161 T *begin() { return data; }
162 const T *begin() const { return data; }
163 const T *cbegin() const { return begin(); }
164 T *end() { return data + size(); }
165 const T *end() const { return data + size(); }
166 const T *cend() const { return end(); }
167};
168
169class QXmlStream
170{
171 Q_DECLARE_TR_FUNCTIONS(QXmlStream)
172};
173
174class QXmlStreamPrivateTagStack {
175public:
176 struct NamespaceDeclaration
177 {
178 XmlStringRef prefix;
179 XmlStringRef namespaceUri;
180 };
181
182 struct Tag
183 {
184 XmlStringRef name;
185 XmlStringRef qualifiedName;
186 NamespaceDeclaration namespaceDeclaration;
187 qsizetype tagStackStringStorageSize;
188 qsizetype namespaceDeclarationsSize;
189 };
190
191
192 QXmlStreamPrivateTagStack();
193 QXmlStreamSimpleStack<NamespaceDeclaration> namespaceDeclarations;
194 QString tagStackStringStorage;
195 qsizetype tagStackStringStorageSize;
196 qsizetype initialTagStackStringStorageSize;
197 bool tagsDone;
198
199 XmlStringRef addToStringStorage(QAnyStringView s)
200 {
201 qsizetype pos = tagStackStringStorageSize;
202 if (pos != tagStackStringStorage.size())
203 tagStackStringStorage.resize(size: pos);
204 s.visit(v: [&](auto s) { tagStackStringStorage.append(s); });
205 qsizetype sz = (tagStackStringStorage.size() - pos);
206 tagStackStringStorageSize += sz;
207 return XmlStringRef(&tagStackStringStorage, pos, sz);
208 }
209
210 QXmlStreamSimpleStack<Tag> tagStack;
211
212
213 inline Tag tagStack_pop() {
214 Tag tag = tagStack.pop();
215 tagStackStringStorageSize = tag.tagStackStringStorageSize;
216 namespaceDeclarations.resize(s: tag.namespaceDeclarationsSize);
217 tagsDone = tagStack.isEmpty();
218 return tag;
219 }
220 inline Tag &tagStack_push() {
221 Tag &tag = tagStack.push();
222 tag.tagStackStringStorageSize = tagStackStringStorageSize;
223 tag.namespaceDeclarationsSize = namespaceDeclarations.size();
224 return tag;
225 }
226};
227
228
229class QXmlStreamEntityResolver;
230class QXmlStreamReaderPrivate : public QXmlStreamGrammar, public QXmlStreamPrivateTagStack
231{
232 QXmlStreamReader *q_ptr;
233 Q_DECLARE_PUBLIC(QXmlStreamReader)
234public:
235 QXmlStreamReaderPrivate(QXmlStreamReader *q);
236 ~QXmlStreamReaderPrivate();
237 void init();
238
239 QByteArray rawReadBuffer;
240 QByteArray dataBuffer;
241 uchar firstByte;
242 qint64 nbytesread;
243 QString readBuffer;
244 qsizetype readBufferPos;
245 QXmlStreamSimpleStack<uint> putStack;
246 struct Entity {
247 Entity() = default;
248 Entity(const QString &name, const QString &value)
249 : name(name), value(value), external(false), unparsed(false), literal(false),
250 hasBeenParsed(false), isCurrentlyReferenced(false){}
251 static inline Entity createLiteral(QLatin1StringView name, QLatin1StringView value)
252 { Entity result(name, value); result.literal = result.hasBeenParsed = true; return result; }
253 QString name, value;
254 uint external : 1;
255 uint unparsed : 1;
256 uint literal : 1;
257 uint hasBeenParsed : 1;
258 uint isCurrentlyReferenced : 1;
259 };
260 // these hash tables use a QStringView as a key to avoid creating QStrings
261 // just for lookup. The keys are usually views into Entity::name and thus
262 // are guaranteed to have the same lifetime as the referenced data:
263 QHash<QStringView, Entity> entityHash;
264 QHash<QStringView, Entity> parameterEntityHash;
265 struct QEntityReference
266 {
267 QHash<QStringView, Entity> *hash;
268 QStringView name;
269 };
270 QXmlStreamSimpleStack<QEntityReference> entityReferenceStack;
271 int entityExpansionLimit = 4096;
272 int entityLength = 0;
273 inline bool referenceEntity(QHash<QStringView, Entity> *hash, Entity &entity)
274 {
275 Q_ASSERT(hash);
276 if (entity.isCurrentlyReferenced) {
277 raiseWellFormedError(message: QXmlStream::tr(sourceText: "Self-referencing entity detected."));
278 return false;
279 }
280 // entityLength represents the amount of additional characters the
281 // entity expands into (can be negative for e.g. &amp;). It's used to
282 // avoid DoS attacks through recursive entity expansions
283 entityLength += entity.value.size() - entity.name.size() - 2;
284 if (entityLength > entityExpansionLimit) {
285 raiseWellFormedError(message: QXmlStream::tr(sourceText: "Entity expands to more characters than the entity expansion limit."));
286 return false;
287 }
288 entity.isCurrentlyReferenced = true;
289 entityReferenceStack.push() = { .hash: hash, .name: entity.name };
290 injectToken(tokenToInject: ENTITY_DONE);
291 return true;
292 }
293
294
295 QIODevice *device;
296 bool deleteDevice;
297 QStringDecoder decoder;
298 bool atEnd;
299
300 enum class XmlContext
301 {
302 Prolog,
303 Body,
304 };
305
306 XmlContext currentContext = XmlContext::Prolog;
307 bool foundDTD = false;
308 bool isValidToken(QXmlStreamReader::TokenType type);
309 void checkToken();
310
311 /*!
312 \sa setType()
313 */
314 QXmlStreamReader::TokenType type;
315 QXmlStreamReader::Error error;
316 QString errorString;
317 QString unresolvedEntity;
318
319 qint64 lineNumber, lastLineStart, characterOffset;
320
321
322 void write(const QString &);
323 void write(const char *);
324
325
326 QXmlStreamAttributes attributes;
327 XmlStringRef namespaceForPrefix(QStringView prefix);
328 void resolveTag();
329 void resolvePublicNamespaces();
330 void resolveDtd();
331 uint resolveCharRef(int symbolIndex);
332 bool checkStartDocument();
333 void startDocument();
334 void parseError();
335 void checkPublicLiteral(QStringView publicId);
336
337 bool scanDtd;
338 XmlStringRef lastAttributeValue;
339 bool lastAttributeIsCData;
340 struct DtdAttribute {
341 XmlStringRef tagName;
342 XmlStringRef attributeQualifiedName;
343 XmlStringRef attributePrefix;
344 XmlStringRef attributeName;
345 XmlStringRef defaultValue;
346 bool isCDATA;
347 bool isNamespaceAttribute;
348 };
349 QXmlStreamSimpleStack<DtdAttribute> dtdAttributes;
350 struct NotationDeclaration {
351 XmlStringRef name;
352 XmlStringRef publicId;
353 XmlStringRef systemId;
354 };
355 QXmlStreamSimpleStack<NotationDeclaration> notationDeclarations;
356 QXmlStreamNotationDeclarations publicNotationDeclarations;
357 QXmlStreamNamespaceDeclarations publicNamespaceDeclarations;
358
359 struct EntityDeclaration {
360 XmlStringRef name;
361 XmlStringRef notationName;
362 XmlStringRef publicId;
363 XmlStringRef systemId;
364 XmlStringRef value;
365 bool parameter;
366 bool external;
367 inline void clear() {
368 name.clear();
369 notationName.clear();
370 publicId.clear();
371 systemId.clear();
372 value.clear();
373 parameter = external = false;
374 }
375 };
376 QXmlStreamSimpleStack<EntityDeclaration> entityDeclarations;
377 QXmlStreamEntityDeclarations publicEntityDeclarations;
378
379 XmlStringRef text;
380
381 XmlStringRef prefix, namespaceUri, qualifiedName, name;
382 XmlStringRef processingInstructionTarget, processingInstructionData;
383 XmlStringRef dtdName, dtdPublicId, dtdSystemId;
384 XmlStringRef documentVersion, documentEncoding;
385 uint isEmptyElement : 1;
386 uint isWhitespace : 1;
387 uint isCDATA : 1;
388 uint standalone : 1;
389 uint hasCheckedStartDocument : 1;
390 uint normalizeLiterals : 1;
391 uint hasSeenTag : 1;
392 uint inParseEntity : 1;
393 uint referenceToUnparsedEntityDetected : 1;
394 uint referenceToParameterEntityDetected : 1;
395 uint hasExternalDtdSubset : 1;
396 uint lockEncoding : 1;
397 uint namespaceProcessing : 1;
398 uint hasStandalone : 1; // TODO: expose in public API
399
400 int resumeReduction;
401 void resume(int rule);
402
403 inline bool entitiesMustBeDeclared() const {
404 return (!inParseEntity
405 && (standalone
406 || (!referenceToUnparsedEntityDetected
407 && !referenceToParameterEntityDetected // Errata 13 as of 2006-04-25
408 && !hasExternalDtdSubset)));
409 }
410
411 // qlalr parser
412 int tos;
413 int stack_size;
414 struct Value {
415 qsizetype pos; // offset into textBuffer
416 qsizetype len; // length incl. prefix (if any)
417 qint16 prefix; // prefix of a name (as in "prefix:name") limited to 4k in fastScanName()
418 ushort c;
419 };
420
421 Value *sym_stack;
422 int *state_stack;
423 inline void reallocateStack();
424 inline Value &sym(int index) const
425 { return sym_stack[tos + index - 1]; }
426 QString textBuffer;
427 inline void clearTextBuffer() {
428 if (!scanDtd) {
429 textBuffer.resize(size: 0);
430 textBuffer.reserve(asize: 256);
431 }
432 }
433 struct Attribute {
434 Value key;
435 Value value;
436 };
437 QXmlStreamSimpleStack<Attribute> attributeStack;
438
439 inline XmlStringRef symString(int index) {
440 const Value &symbol = sym(index);
441 return XmlStringRef(&textBuffer, symbol.pos + symbol.prefix, symbol.len - symbol.prefix);
442 }
443 QStringView symView(int index) const
444 {
445 const Value &symbol = sym(index);
446 return QStringView(textBuffer.data() + symbol.pos, symbol.len).mid(pos: symbol.prefix);
447 }
448 inline XmlStringRef symName(int index) {
449 const Value &symbol = sym(index);
450 return XmlStringRef(&textBuffer, symbol.pos, symbol.len);
451 }
452 inline XmlStringRef symString(int index, int offset) {
453 const Value &symbol = sym(index);
454 return XmlStringRef(&textBuffer, symbol.pos + symbol.prefix + offset, symbol.len - symbol.prefix - offset);
455 }
456 inline XmlStringRef symPrefix(int index) {
457 const Value &symbol = sym(index);
458 if (symbol.prefix)
459 return XmlStringRef(&textBuffer, symbol.pos, symbol.prefix - 1);
460 return XmlStringRef();
461 }
462 inline XmlStringRef symString(const Value &symbol) {
463 return XmlStringRef(&textBuffer, symbol.pos + symbol.prefix, symbol.len - symbol.prefix);
464 }
465 inline XmlStringRef symName(const Value &symbol) {
466 return XmlStringRef(&textBuffer, symbol.pos, symbol.len);
467 }
468 inline XmlStringRef symPrefix(const Value &symbol) {
469 if (symbol.prefix)
470 return XmlStringRef(&textBuffer, symbol.pos, symbol.prefix - 1);
471 return XmlStringRef();
472 }
473
474 inline void clearSym() { Value &val = sym(index: 1); val.pos = textBuffer.size(); val.len = 0; }
475
476
477 short token;
478 uint token_char;
479
480 uint filterCarriageReturn();
481 inline uint getChar();
482 inline uint peekChar();
483 inline void putChar(uint c) { putStack.push() = c; }
484 inline void putChar(QChar c) { putStack.push() = c.unicode(); }
485 void putString(QStringView s, qsizetype from = 0);
486 void putStringLiteral(QStringView s);
487 void putReplacement(QStringView s);
488 void putReplacementInAttributeValue(QStringView s);
489 uint getChar_helper();
490
491 bool scanUntil(const char *str, short tokenToInject = -1);
492 bool scanString(const char *str, short tokenToInject, bool requireSpace = true);
493 inline void injectToken(ushort tokenToInject) {
494 putChar(c: int(tokenToInject) << 16);
495 }
496
497 QString resolveUndeclaredEntity(const QString &name);
498 void parseEntity(const QString &value);
499 std::unique_ptr<QXmlStreamReaderPrivate> entityParser;
500
501 bool scanAfterLangleBang();
502 bool scanPublicOrSystem();
503 bool scanNData();
504 bool scanAfterDefaultDecl();
505 bool scanAttType();
506
507
508 // scan optimization functions. Not strictly necessary but LALR is
509 // not very well suited for scanning fast
510 qsizetype fastScanLiteralContent();
511 qsizetype fastScanSpace();
512 qsizetype fastScanContentCharList();
513 std::optional<qsizetype> fastScanName(Value *val = nullptr);
514 inline qsizetype fastScanNMTOKEN();
515
516
517 bool parse();
518 inline void consumeRule(int);
519
520 void raiseError(QXmlStreamReader::Error error, const QString& message = QString());
521 void raiseWellFormedError(const QString &message);
522 void raiseNamePrefixTooLongError();
523
524 QXmlStreamEntityResolver *entityResolver;
525
526private:
527 /*! \internal
528 Never assign to variable type directly. Instead use this function.
529
530 This prevents errors from being ignored.
531 */
532 inline void setType(const QXmlStreamReader::TokenType t)
533 {
534 if (type != QXmlStreamReader::Invalid)
535 type = t;
536 }
537};
538
539QT_END_NAMESPACE
540
541#endif // QXMLSTREAM_P_H
542
543

source code of qtbase/src/corelib/serialization/qxmlstream_p.h