1// Copyright (C) 2019 The Qt Company Ltd.
2// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only
3
4#include <QtXml/qtxmlglobal.h>
5
6#ifndef QT_NO_DOM
7
8#include "qdomhelpers_p.h"
9#include "qdom_p.h"
10#include "qxmlstream.h"
11#include "private/qxmlstream_p.h"
12
13#include <memory>
14#include <stack>
15
16QT_BEGIN_NAMESPACE
17
18using namespace Qt::StringLiterals;
19
20/**************************************************************
21 *
22 * QDomBuilder
23 *
24 **************************************************************/
25
26QDomBuilder::QDomBuilder(QDomDocumentPrivate *d, QXmlStreamReader *r,
27 QDomDocument::ParseOptions options)
28 : doc(d), node(d), reader(r), parseOptions(options)
29{
30 Q_ASSERT(doc);
31 Q_ASSERT(reader);
32}
33
34QDomBuilder::~QDomBuilder() {}
35
36bool QDomBuilder::endDocument()
37{
38 // ### is this really necessary? (rms)
39 if (node != doc)
40 return false;
41 return true;
42}
43
44bool QDomBuilder::startDTD(const QString &name, const QString &publicId, const QString &systemId)
45{
46 doc->doctype()->name = name;
47 doc->doctype()->publicId = publicId;
48 doc->doctype()->systemId = systemId;
49 return true;
50}
51
52QString QDomBuilder::dtdInternalSubset(const QString &dtd)
53{
54 // https://www.w3.org/TR/xml/#NT-intSubset
55 // doctypedecl: '<!DOCTYPE' S Name (S ExternalID)? S? ('[' intSubset ']' S?)? '>'
56 const QString &name = doc->doctype()->name;
57 QStringView tmp = QStringView(dtd).sliced(pos: dtd.indexOf(s: name) + name.size());
58
59 const QString &publicId = doc->doctype()->publicId;
60 if (!publicId.isEmpty())
61 tmp = tmp.sliced(pos: tmp.indexOf(s: publicId) + publicId.size());
62
63 const QString &systemId = doc->doctype()->systemId;
64 if (!systemId.isEmpty())
65 tmp = tmp.sliced(pos: tmp.indexOf(s: systemId) + systemId.size());
66
67 const qsizetype obra = tmp.indexOf(c: u'[');
68 const qsizetype cbra = tmp.lastIndexOf(c: u']');
69 if (obra >= 0 && cbra >= 0)
70 return tmp.left(n: cbra).sliced(pos: obra + 1).toString();
71
72 return QString();
73}
74
75bool QDomBuilder::parseDTD(const QString &dtd)
76{
77 doc->doctype()->internalSubset = dtdInternalSubset(dtd);
78 return true;
79}
80
81bool QDomBuilder::startElement(const QString &nsURI, const QString &qName,
82 const QXmlStreamAttributes &atts)
83{
84 const bool nsProcessing =
85 parseOptions.testFlag(flag: QDomDocument::ParseOption::UseNamespaceProcessing);
86 QDomNodePrivate *n =
87 nsProcessing ? doc->createElementNS(nsURI, qName) : doc->createElement(tagName: qName);
88 if (!n)
89 return false;
90
91 n->setLocation(lineNumber: int(reader->lineNumber()), columnNumber: int(reader->columnNumber()));
92
93 node->appendChild(newChild: n);
94 node = n;
95
96 // attributes
97 for (const auto &attr : atts) {
98 auto domElement = static_cast<QDomElementPrivate *>(node);
99 if (nsProcessing) {
100 domElement->setAttributeNS(nsURI: attr.namespaceUri().toString(),
101 qName: attr.qualifiedName().toString(),
102 newValue: attr.value().toString());
103 } else {
104 domElement->setAttribute(name: attr.qualifiedName().toString(),
105 value: attr.value().toString());
106 }
107 }
108
109 return true;
110}
111
112bool QDomBuilder::endElement()
113{
114 if (!node || node == doc)
115 return false;
116 node = node->parent();
117
118 return true;
119}
120
121bool QDomBuilder::characters(const QString &characters, bool cdata)
122{
123 // No text as child of some document
124 if (node == doc)
125 return false;
126
127 std::unique_ptr<QDomNodePrivate> n;
128 if (cdata) {
129 n.reset(p: doc->createCDATASection(data: characters));
130 } else if (!entityName.isEmpty()) {
131 auto e = std::make_unique<QDomEntityPrivate>(
132 args&: doc, args: nullptr, args&: entityName, args: QString(), args: QString(), args: QString());
133 e->value = characters;
134 e->ref.deref();
135 doc->doctype()->appendChild(newChild: e.get());
136 Q_UNUSED(e.release());
137 n.reset(p: doc->createEntityReference(name: entityName));
138 } else {
139 n.reset(p: doc->createTextNode(data: characters));
140 }
141 n->setLocation(lineNumber: int(reader->lineNumber()), columnNumber: int(reader->columnNumber()));
142 node->appendChild(newChild: n.get());
143 Q_UNUSED(n.release());
144
145 return true;
146}
147
148bool QDomBuilder::processingInstruction(const QString &target, const QString &data)
149{
150 QDomNodePrivate *n;
151 n = doc->createProcessingInstruction(target, data);
152 if (n) {
153 n->setLocation(lineNumber: int(reader->lineNumber()), columnNumber: int(reader->columnNumber()));
154 node->appendChild(newChild: n);
155 return true;
156 } else
157 return false;
158}
159
160bool QDomBuilder::skippedEntity(const QString &name)
161{
162 QDomNodePrivate *n = doc->createEntityReference(name);
163 n->setLocation(lineNumber: int(reader->lineNumber()), columnNumber: int(reader->columnNumber()));
164 node->appendChild(newChild: n);
165 return true;
166}
167
168void QDomBuilder::fatalError(const QString &message)
169{
170 parseResult.errorMessage = message;
171 parseResult.errorLine = reader->lineNumber();
172 parseResult.errorColumn = reader->columnNumber();
173}
174
175bool QDomBuilder::startEntity(const QString &name)
176{
177 entityName = name;
178 return true;
179}
180
181bool QDomBuilder::endEntity()
182{
183 entityName.clear();
184 return true;
185}
186
187bool QDomBuilder::comment(const QString &characters)
188{
189 QDomNodePrivate *n;
190 n = doc->createComment(data: characters);
191 n->setLocation(lineNumber: int(reader->lineNumber()), columnNumber: int(reader->columnNumber()));
192 node->appendChild(newChild: n);
193 return true;
194}
195
196bool QDomBuilder::unparsedEntityDecl(const QString &name, const QString &publicId,
197 const QString &systemId, const QString &notationName)
198{
199 QDomEntityPrivate *e =
200 new QDomEntityPrivate(doc, nullptr, name, publicId, systemId, notationName);
201 // keep the refcount balanced: appendChild() does a ref anyway.
202 e->ref.deref();
203 doc->doctype()->appendChild(newChild: e);
204 return true;
205}
206
207bool QDomBuilder::externalEntityDecl(const QString &name, const QString &publicId,
208 const QString &systemId)
209{
210 return unparsedEntityDecl(name, publicId, systemId, notationName: QString());
211}
212
213bool QDomBuilder::notationDecl(const QString &name, const QString &publicId,
214 const QString &systemId)
215{
216 QDomNotationPrivate *n = new QDomNotationPrivate(doc, nullptr, name, publicId, systemId);
217 // keep the refcount balanced: appendChild() does a ref anyway.
218 n->ref.deref();
219 doc->doctype()->appendChild(newChild: n);
220 return true;
221}
222
223/**************************************************************
224 *
225 * QDomParser
226 *
227 **************************************************************/
228
229QDomParser::QDomParser(QDomDocumentPrivate *d, QXmlStreamReader *r,
230 QDomDocument::ParseOptions options)
231 : reader(r), domBuilder(d, r, options)
232{
233}
234
235bool QDomParser::parse()
236{
237 return parseProlog() && parseBody();
238}
239
240bool QDomParser::parseProlog()
241{
242 Q_ASSERT(reader);
243
244 bool foundDtd = false;
245
246 while (!reader->atEnd()) {
247 reader->readNext();
248
249 if (reader->hasError()) {
250 domBuilder.fatalError(message: reader->errorString());
251 return false;
252 }
253
254 switch (reader->tokenType()) {
255 case QXmlStreamReader::StartDocument:
256 if (!reader->documentVersion().isEmpty()) {
257 QString value(u"version='"_s);
258 value += reader->documentVersion();
259 value += u'\'';
260 if (!reader->documentEncoding().isEmpty()) {
261 value += u" encoding='"_s;
262 value += reader->documentEncoding();
263 value += u'\'';
264 }
265 if (reader->isStandaloneDocument()) {
266 value += u" standalone='yes'"_s;
267 } else {
268 // Add the standalone attribute only if it was specified
269 if (reader->hasStandaloneDeclaration())
270 value += u" standalone='no'"_s;
271 }
272
273 if (!domBuilder.processingInstruction(target: u"xml"_s, data: value)) {
274 domBuilder.fatalError(
275 message: QDomParser::tr(sourceText: "Error occurred while processing XML declaration"));
276 return false;
277 }
278 }
279 break;
280 case QXmlStreamReader::DTD:
281 if (foundDtd) {
282 domBuilder.fatalError(message: QDomParser::tr(sourceText: "Multiple DTD sections are not allowed"));
283 return false;
284 }
285 foundDtd = true;
286
287 if (!domBuilder.startDTD(name: reader->dtdName().toString(),
288 publicId: reader->dtdPublicId().toString(),
289 systemId: reader->dtdSystemId().toString())) {
290 domBuilder.fatalError(
291 message: QDomParser::tr(sourceText: "Error occurred while processing document type declaration"));
292 return false;
293 }
294 if (!domBuilder.parseDTD(dtd: reader->text().toString()))
295 return false;
296 if (!parseMarkupDecl())
297 return false;
298 break;
299 case QXmlStreamReader::Comment:
300 if (!domBuilder.comment(characters: reader->text().toString())) {
301 domBuilder.fatalError(message: QDomParser::tr(sourceText: "Error occurred while processing comment"));
302 return false;
303 }
304 break;
305 case QXmlStreamReader::ProcessingInstruction:
306 if (!domBuilder.processingInstruction(target: reader->processingInstructionTarget().toString(),
307 data: reader->processingInstructionData().toString())) {
308 domBuilder.fatalError(
309 message: QDomParser::tr(sourceText: "Error occurred while processing a processing instruction"));
310 return false;
311 }
312 break;
313 default:
314 // If the token is none of the above, prolog processing is done.
315 return true;
316 }
317 }
318
319 return true;
320}
321
322bool QDomParser::parseBody()
323{
324 Q_ASSERT(reader);
325
326 std::stack<QString> tagStack;
327 while (!reader->atEnd() && !reader->hasError()) {
328 switch (reader->tokenType()) {
329 case QXmlStreamReader::StartElement:
330 tagStack.push(x: reader->qualifiedName().toString());
331 if (!domBuilder.startElement(nsURI: reader->namespaceUri().toString(),
332 qName: reader->qualifiedName().toString(),
333 atts: reader->attributes())) {
334 domBuilder.fatalError(
335 message: QDomParser::tr(sourceText: "Error occurred while processing a start element"));
336 return false;
337 }
338 break;
339 case QXmlStreamReader::EndElement:
340 if (tagStack.empty() || reader->qualifiedName() != tagStack.top()) {
341 domBuilder.fatalError(
342 message: QDomParser::tr(sourceText: "Unexpected end element '%1'").arg(a: reader->name()));
343 return false;
344 }
345 tagStack.pop();
346 if (!domBuilder.endElement()) {
347 domBuilder.fatalError(
348 message: QDomParser::tr(sourceText: "Error occurred while processing an end element"));
349 return false;
350 }
351 break;
352 case QXmlStreamReader::Characters:
353 // Skip the content if it contains only spacing characters,
354 // unless it's CDATA or PreserveSpacingOnlyNodes was specified.
355 if (reader->isCDATA() || domBuilder.preserveSpacingOnlyNodes()
356 || !(reader->isWhitespace() || reader->text().trimmed().isEmpty())) {
357 if (!domBuilder.characters(characters: reader->text().toString(), cdata: reader->isCDATA())) {
358 domBuilder.fatalError(
359 message: QDomParser::tr(sourceText: "Error occurred while processing the element content"));
360 return false;
361 }
362 }
363 break;
364 case QXmlStreamReader::Comment:
365 if (!domBuilder.comment(characters: reader->text().toString())) {
366 domBuilder.fatalError(message: QDomParser::tr(sourceText: "Error occurred while processing comments"));
367 return false;
368 }
369 break;
370 case QXmlStreamReader::ProcessingInstruction:
371 if (!domBuilder.processingInstruction(target: reader->processingInstructionTarget().toString(),
372 data: reader->processingInstructionData().toString())) {
373 domBuilder.fatalError(
374 message: QDomParser::tr(sourceText: "Error occurred while processing a processing instruction"));
375 return false;
376 }
377 break;
378 case QXmlStreamReader::EntityReference:
379 if (!domBuilder.skippedEntity(name: reader->name().toString())) {
380 domBuilder.fatalError(
381 message: QDomParser::tr(sourceText: "Error occurred while processing an entity reference"));
382 return false;
383 }
384 break;
385 default:
386 domBuilder.fatalError(message: QDomParser::tr(sourceText: "Unexpected token"));
387 return false;
388 }
389
390 reader->readNext();
391 }
392
393 if (reader->hasError()) {
394 domBuilder.fatalError(message: reader->errorString());
395 reader->readNext();
396 return false;
397 }
398
399 if (!tagStack.empty()) {
400 domBuilder.fatalError(message: QDomParser::tr(sourceText: "Tag mismatch"));
401 return false;
402 }
403
404 return true;
405}
406
407bool QDomParser::parseMarkupDecl()
408{
409 Q_ASSERT(reader);
410
411 const auto entities = reader->entityDeclarations();
412 for (const auto &entityDecl : entities) {
413 // Entity declarations are created only for External Entities. Internal Entities
414 // are parsed, and QXmlStreamReader handles the parsing itself and returns the
415 // parsed result. So we don't need to do anything for the Internal Entities.
416 if (!entityDecl.publicId().isEmpty() || !entityDecl.systemId().isEmpty()) {
417 // External Entity
418 if (!domBuilder.unparsedEntityDecl(name: entityDecl.name().toString(),
419 publicId: entityDecl.publicId().toString(),
420 systemId: entityDecl.systemId().toString(),
421 notationName: entityDecl.notationName().toString())) {
422 domBuilder.fatalError(
423 message: QDomParser::tr(sourceText: "Error occurred while processing entity declaration"));
424 return false;
425 }
426 }
427 }
428
429 const auto notations = reader->notationDeclarations();
430 for (const auto &notationDecl : notations) {
431 if (!domBuilder.notationDecl(name: notationDecl.name().toString(),
432 publicId: notationDecl.publicId().toString(),
433 systemId: notationDecl.systemId().toString())) {
434 domBuilder.fatalError(
435 message: QDomParser::tr(sourceText: "Error occurred while processing notation declaration"));
436 return false;
437 }
438 }
439
440 return true;
441}
442
443QT_END_NAMESPACE
444
445#endif // QT_NO_DOM
446

source code of qtbase/src/xml/dom/qdomhelpers.cpp