1/****************************************************************************
2**
3** Copyright (C) 2016 The Qt Company Ltd.
4** Contact: https://www.qt.io/licensing/
5**
6** This file is part of the QtGui module of the Qt Toolkit.
7**
8** $QT_BEGIN_LICENSE:LGPL$
9** Commercial License Usage
10** Licensees holding valid commercial Qt licenses may use this file in
11** accordance with the commercial license agreement provided with the
12** Software or, alternatively, in accordance with the terms contained in
13** a written agreement between you and The Qt Company. For licensing terms
14** and conditions see https://www.qt.io/terms-conditions. For further
15** information use the contact form at https://www.qt.io/contact-us.
16**
17** GNU Lesser General Public License Usage
18** Alternatively, this file may be used under the terms of the GNU Lesser
19** General Public License version 3 as published by the Free Software
20** Foundation and appearing in the file LICENSE.LGPL3 included in the
21** packaging of this file. Please review the following information to
22** ensure the GNU Lesser General Public License version 3 requirements
23** will be met: https://www.gnu.org/licenses/lgpl-3.0.html.
24**
25** GNU General Public License Usage
26** Alternatively, this file may be used under the terms of the GNU
27** General Public License version 2.0 or (at your option) the GNU General
28** Public license version 3 or any later version approved by the KDE Free
29** Qt Foundation. The licenses are as published by the Free Software
30** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3
31** included in the packaging of this file. Please review the following
32** information to ensure the GNU General Public License requirements will
33** be met: https://www.gnu.org/licenses/gpl-2.0.html and
34** https://www.gnu.org/licenses/gpl-3.0.html.
35**
36** $QT_END_LICENSE$
37**
38****************************************************************************/
39
40#include "qtexthtmlparser_p.h"
41
42#include <qbytearray.h>
43#include <qstack.h>
44#include <qdebug.h>
45#include <qthread.h>
46#include <qguiapplication.h>
47
48#include "qtextdocument.h"
49#include "qtextformat_p.h"
50#include "qtextdocument_p.h"
51#include "qtextcursor.h"
52#include "qfont_p.h"
53
54#include <algorithm>
55
56#ifndef QT_NO_TEXTHTMLPARSER
57
58QT_BEGIN_NAMESPACE
59
60// see also tst_qtextdocumentfragment.cpp
61#define MAX_ENTITY 258
62static const struct QTextHtmlEntity { const char name[9]; quint16 code; } entities[]= {
63 { .name: "AElig", .code: 0x00c6 },
64 { .name: "AMP", .code: 38 },
65 { .name: "Aacute", .code: 0x00c1 },
66 { .name: "Acirc", .code: 0x00c2 },
67 { .name: "Agrave", .code: 0x00c0 },
68 { .name: "Alpha", .code: 0x0391 },
69 { .name: "Aring", .code: 0x00c5 },
70 { .name: "Atilde", .code: 0x00c3 },
71 { .name: "Auml", .code: 0x00c4 },
72 { .name: "Beta", .code: 0x0392 },
73 { .name: "Ccedil", .code: 0x00c7 },
74 { .name: "Chi", .code: 0x03a7 },
75 { .name: "Dagger", .code: 0x2021 },
76 { .name: "Delta", .code: 0x0394 },
77 { .name: "ETH", .code: 0x00d0 },
78 { .name: "Eacute", .code: 0x00c9 },
79 { .name: "Ecirc", .code: 0x00ca },
80 { .name: "Egrave", .code: 0x00c8 },
81 { .name: "Epsilon", .code: 0x0395 },
82 { .name: "Eta", .code: 0x0397 },
83 { .name: "Euml", .code: 0x00cb },
84 { .name: "GT", .code: 62 },
85 { .name: "Gamma", .code: 0x0393 },
86 { .name: "Iacute", .code: 0x00cd },
87 { .name: "Icirc", .code: 0x00ce },
88 { .name: "Igrave", .code: 0x00cc },
89 { .name: "Iota", .code: 0x0399 },
90 { .name: "Iuml", .code: 0x00cf },
91 { .name: "Kappa", .code: 0x039a },
92 { .name: "LT", .code: 60 },
93 { .name: "Lambda", .code: 0x039b },
94 { .name: "Mu", .code: 0x039c },
95 { .name: "Ntilde", .code: 0x00d1 },
96 { .name: "Nu", .code: 0x039d },
97 { .name: "OElig", .code: 0x0152 },
98 { .name: "Oacute", .code: 0x00d3 },
99 { .name: "Ocirc", .code: 0x00d4 },
100 { .name: "Ograve", .code: 0x00d2 },
101 { .name: "Omega", .code: 0x03a9 },
102 { .name: "Omicron", .code: 0x039f },
103 { .name: "Oslash", .code: 0x00d8 },
104 { .name: "Otilde", .code: 0x00d5 },
105 { .name: "Ouml", .code: 0x00d6 },
106 { .name: "Phi", .code: 0x03a6 },
107 { .name: "Pi", .code: 0x03a0 },
108 { .name: "Prime", .code: 0x2033 },
109 { .name: "Psi", .code: 0x03a8 },
110 { .name: "QUOT", .code: 34 },
111 { .name: "Rho", .code: 0x03a1 },
112 { .name: "Scaron", .code: 0x0160 },
113 { .name: "Sigma", .code: 0x03a3 },
114 { .name: "THORN", .code: 0x00de },
115 { .name: "Tau", .code: 0x03a4 },
116 { .name: "Theta", .code: 0x0398 },
117 { .name: "Uacute", .code: 0x00da },
118 { .name: "Ucirc", .code: 0x00db },
119 { .name: "Ugrave", .code: 0x00d9 },
120 { .name: "Upsilon", .code: 0x03a5 },
121 { .name: "Uuml", .code: 0x00dc },
122 { .name: "Xi", .code: 0x039e },
123 { .name: "Yacute", .code: 0x00dd },
124 { .name: "Yuml", .code: 0x0178 },
125 { .name: "Zeta", .code: 0x0396 },
126 { .name: "aacute", .code: 0x00e1 },
127 { .name: "acirc", .code: 0x00e2 },
128 { .name: "acute", .code: 0x00b4 },
129 { .name: "aelig", .code: 0x00e6 },
130 { .name: "agrave", .code: 0x00e0 },
131 { .name: "alefsym", .code: 0x2135 },
132 { .name: "alpha", .code: 0x03b1 },
133 { .name: "amp", .code: 38 },
134 { .name: "and", .code: 0x22a5 },
135 { .name: "ang", .code: 0x2220 },
136 { .name: "apos", .code: 0x0027 },
137 { .name: "aring", .code: 0x00e5 },
138 { .name: "asymp", .code: 0x2248 },
139 { .name: "atilde", .code: 0x00e3 },
140 { .name: "auml", .code: 0x00e4 },
141 { .name: "bdquo", .code: 0x201e },
142 { .name: "beta", .code: 0x03b2 },
143 { .name: "brvbar", .code: 0x00a6 },
144 { .name: "bull", .code: 0x2022 },
145 { .name: "cap", .code: 0x2229 },
146 { .name: "ccedil", .code: 0x00e7 },
147 { .name: "cedil", .code: 0x00b8 },
148 { .name: "cent", .code: 0x00a2 },
149 { .name: "chi", .code: 0x03c7 },
150 { .name: "circ", .code: 0x02c6 },
151 { .name: "clubs", .code: 0x2663 },
152 { .name: "cong", .code: 0x2245 },
153 { .name: "copy", .code: 0x00a9 },
154 { .name: "crarr", .code: 0x21b5 },
155 { .name: "cup", .code: 0x222a },
156 { .name: "curren", .code: 0x00a4 },
157 { .name: "dArr", .code: 0x21d3 },
158 { .name: "dagger", .code: 0x2020 },
159 { .name: "darr", .code: 0x2193 },
160 { .name: "deg", .code: 0x00b0 },
161 { .name: "delta", .code: 0x03b4 },
162 { .name: "diams", .code: 0x2666 },
163 { .name: "divide", .code: 0x00f7 },
164 { .name: "eacute", .code: 0x00e9 },
165 { .name: "ecirc", .code: 0x00ea },
166 { .name: "egrave", .code: 0x00e8 },
167 { .name: "empty", .code: 0x2205 },
168 { .name: "emsp", .code: 0x2003 },
169 { .name: "ensp", .code: 0x2002 },
170 { .name: "epsilon", .code: 0x03b5 },
171 { .name: "equiv", .code: 0x2261 },
172 { .name: "eta", .code: 0x03b7 },
173 { .name: "eth", .code: 0x00f0 },
174 { .name: "euml", .code: 0x00eb },
175 { .name: "euro", .code: 0x20ac },
176 { .name: "exist", .code: 0x2203 },
177 { .name: "fnof", .code: 0x0192 },
178 { .name: "forall", .code: 0x2200 },
179 { .name: "frac12", .code: 0x00bd },
180 { .name: "frac14", .code: 0x00bc },
181 { .name: "frac34", .code: 0x00be },
182 { .name: "frasl", .code: 0x2044 },
183 { .name: "gamma", .code: 0x03b3 },
184 { .name: "ge", .code: 0x2265 },
185 { .name: "gt", .code: 62 },
186 { .name: "hArr", .code: 0x21d4 },
187 { .name: "harr", .code: 0x2194 },
188 { .name: "hearts", .code: 0x2665 },
189 { .name: "hellip", .code: 0x2026 },
190 { .name: "iacute", .code: 0x00ed },
191 { .name: "icirc", .code: 0x00ee },
192 { .name: "iexcl", .code: 0x00a1 },
193 { .name: "igrave", .code: 0x00ec },
194 { .name: "image", .code: 0x2111 },
195 { .name: "infin", .code: 0x221e },
196 { .name: "int", .code: 0x222b },
197 { .name: "iota", .code: 0x03b9 },
198 { .name: "iquest", .code: 0x00bf },
199 { .name: "isin", .code: 0x2208 },
200 { .name: "iuml", .code: 0x00ef },
201 { .name: "kappa", .code: 0x03ba },
202 { .name: "lArr", .code: 0x21d0 },
203 { .name: "lambda", .code: 0x03bb },
204 { .name: "lang", .code: 0x2329 },
205 { .name: "laquo", .code: 0x00ab },
206 { .name: "larr", .code: 0x2190 },
207 { .name: "lceil", .code: 0x2308 },
208 { .name: "ldquo", .code: 0x201c },
209 { .name: "le", .code: 0x2264 },
210 { .name: "lfloor", .code: 0x230a },
211 { .name: "lowast", .code: 0x2217 },
212 { .name: "loz", .code: 0x25ca },
213 { .name: "lrm", .code: 0x200e },
214 { .name: "lsaquo", .code: 0x2039 },
215 { .name: "lsquo", .code: 0x2018 },
216 { .name: "lt", .code: 60 },
217 { .name: "macr", .code: 0x00af },
218 { .name: "mdash", .code: 0x2014 },
219 { .name: "micro", .code: 0x00b5 },
220 { .name: "middot", .code: 0x00b7 },
221 { .name: "minus", .code: 0x2212 },
222 { .name: "mu", .code: 0x03bc },
223 { .name: "nabla", .code: 0x2207 },
224 { .name: "nbsp", .code: 0x00a0 },
225 { .name: "ndash", .code: 0x2013 },
226 { .name: "ne", .code: 0x2260 },
227 { .name: "ni", .code: 0x220b },
228 { .name: "not", .code: 0x00ac },
229 { .name: "notin", .code: 0x2209 },
230 { .name: "nsub", .code: 0x2284 },
231 { .name: "ntilde", .code: 0x00f1 },
232 { .name: "nu", .code: 0x03bd },
233 { .name: "oacute", .code: 0x00f3 },
234 { .name: "ocirc", .code: 0x00f4 },
235 { .name: "oelig", .code: 0x0153 },
236 { .name: "ograve", .code: 0x00f2 },
237 { .name: "oline", .code: 0x203e },
238 { .name: "omega", .code: 0x03c9 },
239 { .name: "omicron", .code: 0x03bf },
240 { .name: "oplus", .code: 0x2295 },
241 { .name: "or", .code: 0x22a6 },
242 { .name: "ordf", .code: 0x00aa },
243 { .name: "ordm", .code: 0x00ba },
244 { .name: "oslash", .code: 0x00f8 },
245 { .name: "otilde", .code: 0x00f5 },
246 { .name: "otimes", .code: 0x2297 },
247 { .name: "ouml", .code: 0x00f6 },
248 { .name: "para", .code: 0x00b6 },
249 { .name: "part", .code: 0x2202 },
250 { .name: "percnt", .code: 0x0025 },
251 { .name: "permil", .code: 0x2030 },
252 { .name: "perp", .code: 0x22a5 },
253 { .name: "phi", .code: 0x03c6 },
254 { .name: "pi", .code: 0x03c0 },
255 { .name: "piv", .code: 0x03d6 },
256 { .name: "plusmn", .code: 0x00b1 },
257 { .name: "pound", .code: 0x00a3 },
258 { .name: "prime", .code: 0x2032 },
259 { .name: "prod", .code: 0x220f },
260 { .name: "prop", .code: 0x221d },
261 { .name: "psi", .code: 0x03c8 },
262 { .name: "quot", .code: 34 },
263 { .name: "rArr", .code: 0x21d2 },
264 { .name: "radic", .code: 0x221a },
265 { .name: "rang", .code: 0x232a },
266 { .name: "raquo", .code: 0x00bb },
267 { .name: "rarr", .code: 0x2192 },
268 { .name: "rceil", .code: 0x2309 },
269 { .name: "rdquo", .code: 0x201d },
270 { .name: "real", .code: 0x211c },
271 { .name: "reg", .code: 0x00ae },
272 { .name: "rfloor", .code: 0x230b },
273 { .name: "rho", .code: 0x03c1 },
274 { .name: "rlm", .code: 0x200f },
275 { .name: "rsaquo", .code: 0x203a },
276 { .name: "rsquo", .code: 0x2019 },
277 { .name: "sbquo", .code: 0x201a },
278 { .name: "scaron", .code: 0x0161 },
279 { .name: "sdot", .code: 0x22c5 },
280 { .name: "sect", .code: 0x00a7 },
281 { .name: "shy", .code: 0x00ad },
282 { .name: "sigma", .code: 0x03c3 },
283 { .name: "sigmaf", .code: 0x03c2 },
284 { .name: "sim", .code: 0x223c },
285 { .name: "spades", .code: 0x2660 },
286 { .name: "sub", .code: 0x2282 },
287 { .name: "sube", .code: 0x2286 },
288 { .name: "sum", .code: 0x2211 },
289 { .name: "sup", .code: 0x2283 },
290 { .name: "sup1", .code: 0x00b9 },
291 { .name: "sup2", .code: 0x00b2 },
292 { .name: "sup3", .code: 0x00b3 },
293 { .name: "supe", .code: 0x2287 },
294 { .name: "szlig", .code: 0x00df },
295 { .name: "tau", .code: 0x03c4 },
296 { .name: "there4", .code: 0x2234 },
297 { .name: "theta", .code: 0x03b8 },
298 { .name: "thetasym", .code: 0x03d1 },
299 { .name: "thinsp", .code: 0x2009 },
300 { .name: "thorn", .code: 0x00fe },
301 { .name: "tilde", .code: 0x02dc },
302 { .name: "times", .code: 0x00d7 },
303 { .name: "trade", .code: 0x2122 },
304 { .name: "uArr", .code: 0x21d1 },
305 { .name: "uacute", .code: 0x00fa },
306 { .name: "uarr", .code: 0x2191 },
307 { .name: "ucirc", .code: 0x00fb },
308 { .name: "ugrave", .code: 0x00f9 },
309 { .name: "uml", .code: 0x00a8 },
310 { .name: "upsih", .code: 0x03d2 },
311 { .name: "upsilon", .code: 0x03c5 },
312 { .name: "uuml", .code: 0x00fc },
313 { .name: "weierp", .code: 0x2118 },
314 { .name: "xi", .code: 0x03be },
315 { .name: "yacute", .code: 0x00fd },
316 { .name: "yen", .code: 0x00a5 },
317 { .name: "yuml", .code: 0x00ff },
318 { .name: "zeta", .code: 0x03b6 },
319 { .name: "zwj", .code: 0x200d },
320 { .name: "zwnj", .code: 0x200c }
321};
322Q_STATIC_ASSERT(MAX_ENTITY == sizeof entities / sizeof *entities);
323
324#if defined(Q_CC_MSVC) && _MSC_VER < 1600
325bool operator<(const QTextHtmlEntity &entity1, const QTextHtmlEntity &entity2)
326{
327 return QLatin1String(entity1.name) < QLatin1String(entity2.name);
328}
329#endif
330
331static bool operator<(const QStringRef &entityStr, const QTextHtmlEntity &entity)
332{
333 return entityStr < QLatin1String(entity.name);
334}
335
336static bool operator<(const QTextHtmlEntity &entity, const QStringRef &entityStr)
337{
338 return QLatin1String(entity.name) < entityStr;
339}
340
341static QChar resolveEntity(const QStringRef &entity)
342{
343 const QTextHtmlEntity *start = &entities[0];
344 const QTextHtmlEntity *end = &entities[MAX_ENTITY];
345 const QTextHtmlEntity *e = std::lower_bound(first: start, last: end, val: entity);
346 if (e == end || (entity < *e))
347 return QChar();
348 return e->code;
349}
350
351static const ushort windowsLatin1ExtendedCharacters[0xA0 - 0x80] = {
352 0x20ac, // 0x80
353 0x0081, // 0x81 direct mapping
354 0x201a, // 0x82
355 0x0192, // 0x83
356 0x201e, // 0x84
357 0x2026, // 0x85
358 0x2020, // 0x86
359 0x2021, // 0x87
360 0x02C6, // 0x88
361 0x2030, // 0x89
362 0x0160, // 0x8A
363 0x2039, // 0x8B
364 0x0152, // 0x8C
365 0x008D, // 0x8D direct mapping
366 0x017D, // 0x8E
367 0x008F, // 0x8F directmapping
368 0x0090, // 0x90 directmapping
369 0x2018, // 0x91
370 0x2019, // 0x92
371 0x201C, // 0x93
372 0X201D, // 0x94
373 0x2022, // 0x95
374 0x2013, // 0x96
375 0x2014, // 0x97
376 0x02DC, // 0x98
377 0x2122, // 0x99
378 0x0161, // 0x9A
379 0x203A, // 0x9B
380 0x0153, // 0x9C
381 0x009D, // 0x9D direct mapping
382 0x017E, // 0x9E
383 0x0178 // 0x9F
384};
385
386// the displayMode value is according to the what are blocks in the piecetable, not
387// what the w3c defines.
388static const QTextHtmlElement elements[Html_NumElements]= {
389 { .name: "a", .id: Html_a, .displayMode: QTextHtmlElement::DisplayInline },
390 { .name: "address", .id: Html_address, .displayMode: QTextHtmlElement::DisplayInline },
391 { .name: "b", .id: Html_b, .displayMode: QTextHtmlElement::DisplayInline },
392 { .name: "big", .id: Html_big, .displayMode: QTextHtmlElement::DisplayInline },
393 { .name: "blockquote", .id: Html_blockquote, .displayMode: QTextHtmlElement::DisplayBlock },
394 { .name: "body", .id: Html_body, .displayMode: QTextHtmlElement::DisplayBlock },
395 { .name: "br", .id: Html_br, .displayMode: QTextHtmlElement::DisplayInline },
396 { .name: "caption", .id: Html_caption, .displayMode: QTextHtmlElement::DisplayBlock },
397 { .name: "center", .id: Html_center, .displayMode: QTextHtmlElement::DisplayBlock },
398 { .name: "cite", .id: Html_cite, .displayMode: QTextHtmlElement::DisplayInline },
399 { .name: "code", .id: Html_code, .displayMode: QTextHtmlElement::DisplayInline },
400 { .name: "dd", .id: Html_dd, .displayMode: QTextHtmlElement::DisplayBlock },
401 { .name: "dfn", .id: Html_dfn, .displayMode: QTextHtmlElement::DisplayInline },
402 { .name: "div", .id: Html_div, .displayMode: QTextHtmlElement::DisplayBlock },
403 { .name: "dl", .id: Html_dl, .displayMode: QTextHtmlElement::DisplayBlock },
404 { .name: "dt", .id: Html_dt, .displayMode: QTextHtmlElement::DisplayBlock },
405 { .name: "em", .id: Html_em, .displayMode: QTextHtmlElement::DisplayInline },
406 { .name: "font", .id: Html_font, .displayMode: QTextHtmlElement::DisplayInline },
407 { .name: "h1", .id: Html_h1, .displayMode: QTextHtmlElement::DisplayBlock },
408 { .name: "h2", .id: Html_h2, .displayMode: QTextHtmlElement::DisplayBlock },
409 { .name: "h3", .id: Html_h3, .displayMode: QTextHtmlElement::DisplayBlock },
410 { .name: "h4", .id: Html_h4, .displayMode: QTextHtmlElement::DisplayBlock },
411 { .name: "h5", .id: Html_h5, .displayMode: QTextHtmlElement::DisplayBlock },
412 { .name: "h6", .id: Html_h6, .displayMode: QTextHtmlElement::DisplayBlock },
413 { .name: "head", .id: Html_head, .displayMode: QTextHtmlElement::DisplayNone },
414 { .name: "hr", .id: Html_hr, .displayMode: QTextHtmlElement::DisplayBlock },
415 { .name: "html", .id: Html_html, .displayMode: QTextHtmlElement::DisplayInline },
416 { .name: "i", .id: Html_i, .displayMode: QTextHtmlElement::DisplayInline },
417 { .name: "img", .id: Html_img, .displayMode: QTextHtmlElement::DisplayInline },
418 { .name: "kbd", .id: Html_kbd, .displayMode: QTextHtmlElement::DisplayInline },
419 { .name: "li", .id: Html_li, .displayMode: QTextHtmlElement::DisplayBlock },
420 { .name: "link", .id: Html_link, .displayMode: QTextHtmlElement::DisplayNone },
421 { .name: "meta", .id: Html_meta, .displayMode: QTextHtmlElement::DisplayNone },
422 { .name: "nobr", .id: Html_nobr, .displayMode: QTextHtmlElement::DisplayInline },
423 { .name: "ol", .id: Html_ol, .displayMode: QTextHtmlElement::DisplayBlock },
424 { .name: "p", .id: Html_p, .displayMode: QTextHtmlElement::DisplayBlock },
425 { .name: "pre", .id: Html_pre, .displayMode: QTextHtmlElement::DisplayBlock },
426 { .name: "qt", .id: Html_body /*deliberate mapping*/, .displayMode: QTextHtmlElement::DisplayBlock },
427 { .name: "s", .id: Html_s, .displayMode: QTextHtmlElement::DisplayInline },
428 { .name: "samp", .id: Html_samp, .displayMode: QTextHtmlElement::DisplayInline },
429 { .name: "script", .id: Html_script, .displayMode: QTextHtmlElement::DisplayNone },
430 { .name: "small", .id: Html_small, .displayMode: QTextHtmlElement::DisplayInline },
431 { .name: "span", .id: Html_span, .displayMode: QTextHtmlElement::DisplayInline },
432 { .name: "strong", .id: Html_strong, .displayMode: QTextHtmlElement::DisplayInline },
433 { .name: "style", .id: Html_style, .displayMode: QTextHtmlElement::DisplayNone },
434 { .name: "sub", .id: Html_sub, .displayMode: QTextHtmlElement::DisplayInline },
435 { .name: "sup", .id: Html_sup, .displayMode: QTextHtmlElement::DisplayInline },
436 { .name: "table", .id: Html_table, .displayMode: QTextHtmlElement::DisplayTable },
437 { .name: "tbody", .id: Html_tbody, .displayMode: QTextHtmlElement::DisplayTable },
438 { .name: "td", .id: Html_td, .displayMode: QTextHtmlElement::DisplayBlock },
439 { .name: "tfoot", .id: Html_tfoot, .displayMode: QTextHtmlElement::DisplayTable },
440 { .name: "th", .id: Html_th, .displayMode: QTextHtmlElement::DisplayBlock },
441 { .name: "thead", .id: Html_thead, .displayMode: QTextHtmlElement::DisplayTable },
442 { .name: "title", .id: Html_title, .displayMode: QTextHtmlElement::DisplayNone },
443 { .name: "tr", .id: Html_tr, .displayMode: QTextHtmlElement::DisplayTable },
444 { .name: "tt", .id: Html_tt, .displayMode: QTextHtmlElement::DisplayInline },
445 { .name: "u", .id: Html_u, .displayMode: QTextHtmlElement::DisplayInline },
446 { .name: "ul", .id: Html_ul, .displayMode: QTextHtmlElement::DisplayBlock },
447 { .name: "var", .id: Html_var, .displayMode: QTextHtmlElement::DisplayInline },
448};
449
450static bool operator<(const QString &str, const QTextHtmlElement &e)
451{
452 return str < QLatin1String(e.name);
453}
454
455static bool operator<(const QTextHtmlElement &e, const QString &str)
456{
457 return QLatin1String(e.name) < str;
458}
459
460static const QTextHtmlElement *lookupElementHelper(const QString &element)
461{
462 const QTextHtmlElement *start = &elements[0];
463 const QTextHtmlElement *end = &elements[Html_NumElements];
464 const QTextHtmlElement *e = std::lower_bound(first: start, last: end, val: element);
465 if ((e == end) || (element < *e))
466 return nullptr;
467 return e;
468}
469
470int QTextHtmlParser::lookupElement(const QString &element)
471{
472 const QTextHtmlElement *e = lookupElementHelper(element);
473 if (!e)
474 return -1;
475 return e->id;
476}
477
478// quotes newlines as "\\n"
479static QString quoteNewline(const QString &s)
480{
481 QString n = s;
482 n.replace(c: QLatin1Char('\n'), after: QLatin1String("\\n"));
483 return n;
484}
485
486QTextHtmlParserNode::QTextHtmlParserNode()
487 : parent(0), id(Html_unknown),
488 cssFloat(QTextFrameFormat::InFlow), hasOwnListStyle(false), hasOwnLineHeightType(false), hasLineHeightMultiplier(false),
489 hasCssListIndent(false), isEmptyParagraph(false), isTextFrame(false), isRootFrame(false),
490 displayMode(QTextHtmlElement::DisplayInline), hasHref(false),
491 listStyle(QTextListFormat::ListStyleUndefined), imageWidth(-1), imageHeight(-1), tableBorder(0),
492 tableCellRowSpan(1), tableCellColSpan(1), tableCellSpacing(2), tableCellPadding(0),
493 borderBrush(Qt::darkGray), borderStyle(QTextFrameFormat::BorderStyle_Outset),
494 borderCollapse(false),
495 userState(-1), cssListIndent(0), wsm(WhiteSpaceModeUndefined)
496{
497 margin[QTextHtmlParser::MarginLeft] = 0;
498 margin[QTextHtmlParser::MarginRight] = 0;
499 margin[QTextHtmlParser::MarginTop] = 0;
500 margin[QTextHtmlParser::MarginBottom] = 0;
501
502 for (int i = 0; i < 4; ++i) {
503 tableCellBorderStyle[i] = QTextFrameFormat::BorderStyle_None;
504 tableCellBorder[i] = 0;
505 tableCellBorderBrush[i] = Qt::NoBrush;
506 }
507}
508
509void QTextHtmlParser::dumpHtml()
510{
511 for (int i = 0; i < count(); ++i) {
512 qDebug().nospace() << qPrintable(QString(depth(i)*4, QLatin1Char(' ')))
513 << qPrintable(at(i).tag) << ':'
514 << quoteNewline(s: at(i).text);
515 ;
516 }
517}
518
519QTextHtmlParserNode *QTextHtmlParser::newNode(int parent)
520{
521 QTextHtmlParserNode *lastNode = &nodes.last();
522 QTextHtmlParserNode *newNode = nullptr;
523
524 bool reuseLastNode = true;
525
526 if (nodes.count() == 1) {
527 reuseLastNode = false;
528 } else if (lastNode->tag.isEmpty()) {
529
530 if (lastNode->text.isEmpty()) {
531 reuseLastNode = true;
532 } else { // last node is a text node (empty tag) with some text
533
534 if (lastNode->text.length() == 1 && lastNode->text.at(i: 0).isSpace()) {
535
536 int lastSibling = count() - 2;
537 while (lastSibling
538 && at(i: lastSibling).parent != lastNode->parent
539 && at(i: lastSibling).displayMode == QTextHtmlElement::DisplayInline) {
540 lastSibling = at(i: lastSibling).parent;
541 }
542
543 if (at(i: lastSibling).displayMode == QTextHtmlElement::DisplayInline) {
544 reuseLastNode = false;
545 } else {
546 reuseLastNode = true;
547 }
548 } else {
549 // text node with real (non-whitespace) text -> nothing to re-use
550 reuseLastNode = false;
551 }
552
553 }
554
555 } else {
556 // last node had a proper tag -> nothing to re-use
557 reuseLastNode = false;
558 }
559
560 if (reuseLastNode) {
561 newNode = lastNode;
562 newNode->tag.clear();
563 newNode->text.clear();
564 newNode->id = Html_unknown;
565 } else {
566 nodes.resize(asize: nodes.size() + 1);
567 newNode = &nodes.last();
568 }
569
570 newNode->parent = parent;
571 return newNode;
572}
573
574void QTextHtmlParser::parse(const QString &text, const QTextDocument *_resourceProvider)
575{
576 nodes.clear();
577 nodes.resize(asize: 1);
578 txt = text;
579 pos = 0;
580 len = txt.length();
581 textEditMode = false;
582 resourceProvider = _resourceProvider;
583 parse();
584 //dumpHtml();
585}
586
587int QTextHtmlParser::depth(int i) const
588{
589 int depth = 0;
590 while (i) {
591 i = at(i).parent;
592 ++depth;
593 }
594 return depth;
595}
596
597int QTextHtmlParser::margin(int i, int mar) const {
598 int m = 0;
599 const QTextHtmlParserNode *node;
600 if (mar == MarginLeft
601 || mar == MarginRight) {
602 while (i) {
603 node = &at(i);
604 if (!node->isBlock() && node->id != Html_table)
605 break;
606 if (node->isTableCell())
607 break;
608 m += node->margin[mar];
609 i = node->parent;
610 }
611 }
612 return m;
613}
614
615int QTextHtmlParser::topMargin(int i) const
616{
617 if (!i)
618 return 0;
619 return at(i).margin[MarginTop];
620}
621
622int QTextHtmlParser::bottomMargin(int i) const
623{
624 if (!i)
625 return 0;
626 return at(i).margin[MarginBottom];
627}
628
629void QTextHtmlParser::eatSpace()
630{
631 while (pos < len && txt.at(i: pos).isSpace() && txt.at(i: pos) != QChar::ParagraphSeparator)
632 pos++;
633}
634
635void QTextHtmlParser::parse()
636{
637 while (pos < len) {
638 QChar c = txt.at(i: pos++);
639 if (c == QLatin1Char('<')) {
640 parseTag();
641 } else if (c == QLatin1Char('&')) {
642 nodes.last().text += parseEntity();
643 } else {
644 nodes.last().text += c;
645 }
646 }
647}
648
649// parses a tag after "<"
650void QTextHtmlParser::parseTag()
651{
652 eatSpace();
653
654 // handle comments and other exclamation mark declarations
655 if (hasPrefix(c: QLatin1Char('!'))) {
656 parseExclamationTag();
657 if (nodes.last().wsm != QTextHtmlParserNode::WhiteSpacePre
658 && nodes.last().wsm != QTextHtmlParserNode::WhiteSpacePreWrap
659 && !textEditMode)
660 eatSpace();
661 return;
662 }
663
664 // if close tag just close
665 if (hasPrefix(c: QLatin1Char('/'))) {
666 if (nodes.last().id == Html_style) {
667#ifndef QT_NO_CSSPARSER
668 QCss::Parser parser(nodes.constLast().text);
669 QCss::StyleSheet sheet;
670 sheet.origin = QCss::StyleSheetOrigin_Author;
671 parser.parse(styleSheet: &sheet, nameCaseSensitivity: Qt::CaseInsensitive);
672 inlineStyleSheets.append(t: sheet);
673 resolveStyleSheetImports(sheet);
674#endif
675 }
676 parseCloseTag();
677 return;
678 }
679
680 int p = last();
681 while (p && at(i: p).tag.size() == 0)
682 p = at(i: p).parent;
683
684 QTextHtmlParserNode *node = newNode(parent: p);
685
686 // parse tag name
687 node->tag = parseWord().toLower();
688
689 const QTextHtmlElement *elem = lookupElementHelper(element: node->tag);
690 if (elem) {
691 node->id = elem->id;
692 node->displayMode = elem->displayMode;
693 } else {
694 node->id = Html_unknown;
695 }
696
697 node->attributes.clear();
698 // _need_ at least one space after the tag name, otherwise there can't be attributes
699 if (pos < len && txt.at(i: pos).isSpace())
700 node->attributes = parseAttributes();
701
702 // resolveParent() may have to change the order in the tree and
703 // insert intermediate nodes for buggy HTML, so re-initialize the 'node'
704 // pointer through the return value
705 node = resolveParent();
706 resolveNode();
707
708#ifndef QT_NO_CSSPARSER
709 const int nodeIndex = nodes.count() - 1; // this new node is always the last
710 node->applyCssDeclarations(declarations: declarationsForNode(node: nodeIndex), resourceProvider);
711#endif
712 applyAttributes(attributes: node->attributes);
713
714 // finish tag
715 bool tagClosed = false;
716 while (pos < len && txt.at(i: pos) != QLatin1Char('>')) {
717 if (txt.at(i: pos) == QLatin1Char('/'))
718 tagClosed = true;
719
720 pos++;
721 }
722 pos++;
723
724 // in a white-space preserving environment strip off a initial newline
725 // since the element itself already generates a newline
726 if ((node->wsm == QTextHtmlParserNode::WhiteSpacePre
727 || node->wsm == QTextHtmlParserNode::WhiteSpacePreWrap
728 || node->wsm == QTextHtmlParserNode::WhiteSpacePreLine)
729 && node->isBlock()) {
730 if (pos < len - 1 && txt.at(i: pos) == QLatin1Char('\n'))
731 ++pos;
732 }
733
734 if (node->mayNotHaveChildren() || tagClosed) {
735 newNode(parent: node->parent);
736 resolveNode();
737 }
738}
739
740// parses a tag beginning with "/"
741void QTextHtmlParser::parseCloseTag()
742{
743 ++pos;
744 QString tag = parseWord().toLower().trimmed();
745 while (pos < len) {
746 QChar c = txt.at(i: pos++);
747 if (c == QLatin1Char('>'))
748 break;
749 }
750
751 // find corresponding open node
752 int p = last();
753 if (p > 0
754 && at(i: p - 1).tag == tag
755 && at(i: p - 1).mayNotHaveChildren())
756 p--;
757
758 while (p && at(i: p).tag != tag)
759 p = at(i: p).parent;
760
761 // simply ignore the tag if we can't find
762 // a corresponding open node, for broken
763 // html such as <font>blah</font></font>
764 if (!p)
765 return;
766
767 // in a white-space preserving environment strip off a trailing newline
768 // since the closing of the opening block element will automatically result
769 // in a new block for elements following the <pre>
770 // ...foo\n</pre><p>blah -> foo</pre><p>blah
771 if ((at(i: p).wsm == QTextHtmlParserNode::WhiteSpacePre
772 || at(i: p).wsm == QTextHtmlParserNode::WhiteSpacePreWrap
773 || at(i: p).wsm == QTextHtmlParserNode::WhiteSpacePreLine)
774 && at(i: p).isBlock()) {
775 if (at(i: last()).text.endsWith(c: QLatin1Char('\n')))
776 nodes[last()].text.chop(n: 1);
777 }
778
779 newNode(parent: at(i: p).parent);
780 resolveNode();
781}
782
783// parses a tag beginning with "!"
784void QTextHtmlParser::parseExclamationTag()
785{
786 ++pos;
787 if (hasPrefix(c: QLatin1Char('-')) && hasPrefix(c: QLatin1Char('-'), lookahead: 1)) {
788 pos += 2;
789 // eat comments
790 int end = txt.indexOf(s: QLatin1String("-->"), from: pos);
791 pos = (end >= 0 ? end + 3 : len);
792 } else {
793 // eat internal tags
794 while (pos < len) {
795 QChar c = txt.at(i: pos++);
796 if (c == QLatin1Char('>'))
797 break;
798 }
799 }
800}
801
802// parses an entity after "&", and returns it
803QString QTextHtmlParser::parseEntity()
804{
805 const int recover = pos;
806 int entityLen = 0;
807 QStringRef entity;
808 while (pos < len) {
809 QChar c = txt.at(i: pos++);
810 if (c.isSpace() || pos - recover > 9) {
811 goto error;
812 }
813 if (c == QLatin1Char(';'))
814 break;
815 ++entityLen;
816 }
817 if (entityLen) {
818 entity = QStringRef(&txt, recover, entityLen);
819 QChar resolved = resolveEntity(entity);
820 if (!resolved.isNull())
821 return QString(resolved);
822
823 if (entityLen > 1 && entity.at(i: 0) == QLatin1Char('#')) {
824 entity = entity.mid(pos: 1); // removing leading #
825
826 int base = 10;
827 bool ok = false;
828
829 if (entity.at(i: 0).toLower() == QLatin1Char('x')) { // hex entity?
830 entity = entity.mid(pos: 1);
831 base = 16;
832 }
833
834 uint uc = entity.toUInt(ok: &ok, base);
835 if (ok) {
836 if (uc >= 0x80 && uc < 0x80 + (sizeof(windowsLatin1ExtendedCharacters)/sizeof(windowsLatin1ExtendedCharacters[0])))
837 uc = windowsLatin1ExtendedCharacters[uc - 0x80];
838 QString str;
839 if (QChar::requiresSurrogates(ucs4: uc)) {
840 str += QChar(QChar::highSurrogate(ucs4: uc));
841 str += QChar(QChar::lowSurrogate(ucs4: uc));
842 } else {
843 str = QChar(uc);
844 }
845 return str;
846 }
847 }
848 }
849error:
850 pos = recover;
851 return QLatin1String("&");
852}
853
854// parses one word, possibly quoted, and returns it
855QString QTextHtmlParser::parseWord()
856{
857 QString word;
858 if (hasPrefix(c: QLatin1Char('\"'))) { // double quotes
859 ++pos;
860 while (pos < len) {
861 QChar c = txt.at(i: pos++);
862 if (c == QLatin1Char('\"'))
863 break;
864 else if (c == QLatin1Char('&'))
865 word += parseEntity();
866 else
867 word += c;
868 }
869 } else if (hasPrefix(c: QLatin1Char('\''))) { // single quotes
870 ++pos;
871 while (pos < len) {
872 QChar c = txt.at(i: pos++);
873 // Allow for escaped single quotes as they may be part of the string
874 if (c == QLatin1Char('\'') && (txt.length() > 1 && txt.at(i: pos - 2) != QLatin1Char('\\')))
875 break;
876 else
877 word += c;
878 }
879 } else { // normal text
880 while (pos < len) {
881 QChar c = txt.at(i: pos++);
882 if (c == QLatin1Char('>')
883 || (c == QLatin1Char('/') && hasPrefix(c: QLatin1Char('>')))
884 || c == QLatin1Char('<')
885 || c == QLatin1Char('=')
886 || c.isSpace()) {
887 --pos;
888 break;
889 }
890 if (c == QLatin1Char('&'))
891 word += parseEntity();
892 else
893 word += c;
894 }
895 }
896 return word;
897}
898
899// gives the new node the right parent
900QTextHtmlParserNode *QTextHtmlParser::resolveParent()
901{
902 QTextHtmlParserNode *node = &nodes.last();
903
904 int p = node->parent;
905
906 // Excel gives us buggy HTML with just tr without surrounding table tags
907 // or with just td tags
908
909 if (node->id == Html_td) {
910 int n = p;
911 while (n && at(i: n).id != Html_tr)
912 n = at(i: n).parent;
913
914 if (!n) {
915 nodes.insert(i: nodes.count() - 1, t: QTextHtmlParserNode());
916 nodes.insert(i: nodes.count() - 1, t: QTextHtmlParserNode());
917
918 QTextHtmlParserNode *table = &nodes[nodes.count() - 3];
919 table->parent = p;
920 table->id = Html_table;
921 table->tag = QLatin1String("table");
922 table->children.append(t: nodes.count() - 2); // add row as child
923
924 QTextHtmlParserNode *row = &nodes[nodes.count() - 2];
925 row->parent = nodes.count() - 3; // table as parent
926 row->id = Html_tr;
927 row->tag = QLatin1String("tr");
928
929 p = nodes.count() - 2;
930 node = &nodes.last(); // re-initialize pointer
931 }
932 }
933
934 if (node->id == Html_tr) {
935 int n = p;
936 while (n && at(i: n).id != Html_table)
937 n = at(i: n).parent;
938
939 if (!n) {
940 nodes.insert(i: nodes.count() - 1, t: QTextHtmlParserNode());
941 QTextHtmlParserNode *table = &nodes[nodes.count() - 2];
942 table->parent = p;
943 table->id = Html_table;
944 table->tag = QLatin1String("table");
945 p = nodes.count() - 2;
946 node = &nodes.last(); // re-initialize pointer
947 }
948 }
949
950 // permit invalid html by letting block elements be children
951 // of inline elements with the exception of paragraphs:
952 //
953 // a new paragraph closes parent inline elements (while loop),
954 // unless they themselves are children of a non-paragraph block
955 // element (if statement)
956 //
957 // For example:
958 //
959 // <body><p><b>Foo<p>Bar <-- second <p> implicitly closes <b> that
960 // belongs to the first <p>. The self-nesting
961 // check further down prevents the second <p>
962 // from nesting into the first one then.
963 // so Bar is not bold.
964 //
965 // <body><b><p>Foo <-- Foo should be bold.
966 //
967 // <body><b><p>Foo<p>Bar <-- Foo and Bar should be bold.
968 //
969 if (node->id == Html_p) {
970 while (p && !at(i: p).isBlock())
971 p = at(i: p).parent;
972
973 if (!p || at(i: p).id != Html_p)
974 p = node->parent;
975 }
976
977 // some elements are not self nesting
978 if (node->id == at(i: p).id
979 && node->isNotSelfNesting())
980 p = at(i: p).parent;
981
982 // some elements are not allowed in certain contexts
983 while ((p && !node->allowedInContext(parentId: at(i: p).id))
984 // ### make new styles aware of empty tags
985 || at(i: p).mayNotHaveChildren()
986 ) {
987 p = at(i: p).parent;
988 }
989
990 node->parent = p;
991
992 // makes it easier to traverse the tree, later
993 nodes[p].children.append(t: nodes.count() - 1);
994 return node;
995}
996
997// sets all properties on the new node
998void QTextHtmlParser::resolveNode()
999{
1000 QTextHtmlParserNode *node = &nodes.last();
1001 const QTextHtmlParserNode *parent = &nodes.at(i: node->parent);
1002 node->initializeProperties(parent, parser: this);
1003}
1004
1005bool QTextHtmlParserNode::isNestedList(const QTextHtmlParser *parser) const
1006{
1007 if (!isListStart())
1008 return false;
1009
1010 int p = parent;
1011 while (p) {
1012 if (parser->at(i: p).isListStart())
1013 return true;
1014 p = parser->at(i: p).parent;
1015 }
1016 return false;
1017}
1018
1019void QTextHtmlParserNode::initializeProperties(const QTextHtmlParserNode *parent, const QTextHtmlParser *parser)
1020{
1021 // inherit properties from parent element
1022 charFormat = parent->charFormat;
1023
1024 if (id == Html_html)
1025 blockFormat.setLayoutDirection(Qt::LeftToRight); // HTML default
1026 else if (parent->blockFormat.hasProperty(propertyId: QTextFormat::LayoutDirection))
1027 blockFormat.setLayoutDirection(parent->blockFormat.layoutDirection());
1028
1029 if (parent->displayMode == QTextHtmlElement::DisplayNone)
1030 displayMode = QTextHtmlElement::DisplayNone;
1031
1032 if (parent->id != Html_table || id == Html_caption) {
1033 if (parent->blockFormat.hasProperty(propertyId: QTextFormat::BlockAlignment))
1034 blockFormat.setAlignment(parent->blockFormat.alignment());
1035 else
1036 blockFormat.clearProperty(propertyId: QTextFormat::BlockAlignment);
1037 }
1038 // we don't paint per-row background colors, yet. so as an
1039 // exception inherit the background color here
1040 // we also inherit the background between inline elements
1041 if ((parent->id != Html_tr || !isTableCell())
1042 && (displayMode != QTextHtmlElement::DisplayInline || parent->displayMode != QTextHtmlElement::DisplayInline)) {
1043 charFormat.clearProperty(propertyId: QTextFormat::BackgroundBrush);
1044 }
1045
1046 listStyle = parent->listStyle;
1047 // makes no sense to inherit that property, a named anchor is a single point
1048 // in the document, which is set by the DocumentFragment
1049 charFormat.clearProperty(propertyId: QTextFormat::AnchorName);
1050 wsm = parent->wsm;
1051
1052 // initialize remaining properties
1053 margin[QTextHtmlParser::MarginLeft] = 0;
1054 margin[QTextHtmlParser::MarginRight] = 0;
1055 margin[QTextHtmlParser::MarginTop] = 0;
1056 margin[QTextHtmlParser::MarginBottom] = 0;
1057 cssFloat = QTextFrameFormat::InFlow;
1058
1059 for (int i = 0; i < 4; ++i)
1060 padding[i] = -1;
1061
1062 // set element specific attributes
1063 switch (id) {
1064 case Html_a:
1065 for (int i = 0; i < attributes.count(); i += 2) {
1066 const QString key = attributes.at(i);
1067 if (key.compare(other: QLatin1String("href"), cs: Qt::CaseInsensitive) == 0
1068 && !attributes.at(i: i + 1).isEmpty()) {
1069 hasHref = true;
1070 }
1071 }
1072 charFormat.setAnchor(true);
1073 break;
1074 case Html_big:
1075 charFormat.setProperty(propertyId: QTextFormat::FontSizeAdjustment, value: int(1));
1076 break;
1077 case Html_small:
1078 charFormat.setProperty(propertyId: QTextFormat::FontSizeAdjustment, value: int(-1));
1079 break;
1080 case Html_h1:
1081 charFormat.setProperty(propertyId: QTextFormat::FontSizeAdjustment, value: int(3));
1082 margin[QTextHtmlParser::MarginTop] = 18;
1083 margin[QTextHtmlParser::MarginBottom] = 12;
1084 break;
1085 case Html_h2:
1086 charFormat.setProperty(propertyId: QTextFormat::FontSizeAdjustment, value: int(2));
1087 margin[QTextHtmlParser::MarginTop] = 16;
1088 margin[QTextHtmlParser::MarginBottom] = 12;
1089 break;
1090 case Html_h3:
1091 charFormat.setProperty(propertyId: QTextFormat::FontSizeAdjustment, value: int(1));
1092 margin[QTextHtmlParser::MarginTop] = 14;
1093 margin[QTextHtmlParser::MarginBottom] = 12;
1094 break;
1095 case Html_h4:
1096 charFormat.setProperty(propertyId: QTextFormat::FontSizeAdjustment, value: int(0));
1097 margin[QTextHtmlParser::MarginTop] = 12;
1098 margin[QTextHtmlParser::MarginBottom] = 12;
1099 break;
1100 case Html_h5:
1101 charFormat.setProperty(propertyId: QTextFormat::FontSizeAdjustment, value: int(-1));
1102 margin[QTextHtmlParser::MarginTop] = 12;
1103 margin[QTextHtmlParser::MarginBottom] = 4;
1104 break;
1105 case Html_p:
1106 margin[QTextHtmlParser::MarginTop] = 12;
1107 margin[QTextHtmlParser::MarginBottom] = 12;
1108 break;
1109 case Html_ul:
1110 // nested lists don't have margins, except for the toplevel one
1111 if (!isNestedList(parser)) {
1112 margin[QTextHtmlParser::MarginTop] = 12;
1113 margin[QTextHtmlParser::MarginBottom] = 12;
1114 }
1115 // no left margin as we use indenting instead
1116 break;
1117 case Html_ol:
1118 // nested lists don't have margins, except for the toplevel one
1119 if (!isNestedList(parser)) {
1120 margin[QTextHtmlParser::MarginTop] = 12;
1121 margin[QTextHtmlParser::MarginBottom] = 12;
1122 }
1123 // no left margin as we use indenting instead
1124 break;
1125 case Html_br:
1126 text = QChar(QChar::LineSeparator);
1127 break;
1128 case Html_pre:
1129 margin[QTextHtmlParser::MarginTop] = 12;
1130 margin[QTextHtmlParser::MarginBottom] = 12;
1131 break;
1132 case Html_blockquote:
1133 margin[QTextHtmlParser::MarginTop] = 12;
1134 margin[QTextHtmlParser::MarginBottom] = 12;
1135 margin[QTextHtmlParser::MarginLeft] = 40;
1136 margin[QTextHtmlParser::MarginRight] = 40;
1137 blockFormat.setProperty(propertyId: QTextFormat::BlockQuoteLevel, value: 1);
1138 break;
1139 case Html_dl:
1140 margin[QTextHtmlParser::MarginTop] = 8;
1141 margin[QTextHtmlParser::MarginBottom] = 8;
1142 break;
1143 case Html_dd:
1144 margin[QTextHtmlParser::MarginLeft] = 30;
1145 break;
1146 default: break;
1147 }
1148}
1149
1150#ifndef QT_NO_CSSPARSER
1151void QTextHtmlParserNode::setListStyle(const QVector<QCss::Value> &cssValues)
1152{
1153 for (int i = 0; i < cssValues.count(); ++i) {
1154 if (cssValues.at(i).type == QCss::Value::KnownIdentifier) {
1155 switch (static_cast<QCss::KnownValue>(cssValues.at(i).variant.toInt())) {
1156 case QCss::Value_None: hasOwnListStyle = true; listStyle = QTextListFormat::ListStyleUndefined; break;
1157 case QCss::Value_Disc: hasOwnListStyle = true; listStyle = QTextListFormat::ListDisc; break;
1158 case QCss::Value_Square: hasOwnListStyle = true; listStyle = QTextListFormat::ListSquare; break;
1159 case QCss::Value_Circle: hasOwnListStyle = true; listStyle = QTextListFormat::ListCircle; break;
1160 case QCss::Value_Decimal: hasOwnListStyle = true; listStyle = QTextListFormat::ListDecimal; break;
1161 case QCss::Value_LowerAlpha: hasOwnListStyle = true; listStyle = QTextListFormat::ListLowerAlpha; break;
1162 case QCss::Value_UpperAlpha: hasOwnListStyle = true; listStyle = QTextListFormat::ListUpperAlpha; break;
1163 case QCss::Value_LowerRoman: hasOwnListStyle = true; listStyle = QTextListFormat::ListLowerRoman; break;
1164 case QCss::Value_UpperRoman: hasOwnListStyle = true; listStyle = QTextListFormat::ListUpperRoman; break;
1165 default: break;
1166 }
1167 }
1168 }
1169 // allow individual list items to override the style
1170 if (id == Html_li && hasOwnListStyle)
1171 blockFormat.setProperty(propertyId: QTextFormat::ListStyle, value: listStyle);
1172}
1173
1174void QTextHtmlParserNode::applyCssDeclarations(const QVector<QCss::Declaration> &declarations, const QTextDocument *resourceProvider)
1175{
1176 QCss::ValueExtractor extractor(declarations);
1177 extractor.extractBox(margins: margin, paddings: padding);
1178
1179 if (id == Html_td || id == Html_th) {
1180 QCss::BorderStyle cssStyles[4];
1181 int cssBorder[4];
1182 QSize cssRadii[4]; // unused
1183 for (int i = 0; i < 4; ++i) {
1184 cssStyles[i] = QCss::BorderStyle_None;
1185 cssBorder[i] = 0;
1186 }
1187 // this will parse (and cache) "border-width" as a list so the
1188 // QCss::BorderWidth parsing below which expects a single value
1189 // will not work as expected - which in this case does not matter
1190 // because tableBorder is not relevant for cells.
1191 extractor.extractBorder(borders: cssBorder, colors: tableCellBorderBrush, Styles: cssStyles, radii: cssRadii);
1192 for (int i = 0; i < 4; ++i) {
1193 tableCellBorderStyle[i] = static_cast<QTextFrameFormat::BorderStyle>(cssStyles[i] - 1);
1194 tableCellBorder[i] = static_cast<qreal>(cssBorder[i]);
1195 }
1196 }
1197
1198 for (int i = 0; i < declarations.count(); ++i) {
1199 const QCss::Declaration &decl = declarations.at(i);
1200 if (decl.d->values.isEmpty()) continue;
1201
1202 QCss::KnownValue identifier = QCss::UnknownValue;
1203 if (decl.d->values.first().type == QCss::Value::KnownIdentifier)
1204 identifier = static_cast<QCss::KnownValue>(decl.d->values.first().variant.toInt());
1205
1206 switch (decl.d->propertyId) {
1207 case QCss::BorderColor: borderBrush = QBrush(decl.colorValue()); break;
1208 case QCss::BorderStyles:
1209 if (decl.styleValue() != QCss::BorderStyle_Unknown && decl.styleValue() != QCss::BorderStyle_Native)
1210 borderStyle = static_cast<QTextFrameFormat::BorderStyle>(decl.styleValue() - 1);
1211 break;
1212 case QCss::BorderWidth: {
1213 int borders[4];
1214 extractor.lengthValues(decl, m: borders);
1215 tableBorder = borders[0];
1216 }
1217 break;
1218 case QCss::BorderCollapse:
1219 borderCollapse = decl.borderCollapseValue();
1220 break;
1221 case QCss::Color: charFormat.setForeground(decl.colorValue()); break;
1222 case QCss::Float:
1223 cssFloat = QTextFrameFormat::InFlow;
1224 switch (identifier) {
1225 case QCss::Value_Left: cssFloat = QTextFrameFormat::FloatLeft; break;
1226 case QCss::Value_Right: cssFloat = QTextFrameFormat::FloatRight; break;
1227 default: break;
1228 }
1229 break;
1230 case QCss::QtBlockIndent:
1231 blockFormat.setIndent(decl.d->values.first().variant.toInt());
1232 break;
1233 case QCss::QtLineHeightType: {
1234 QString lineHeightTypeName = decl.d->values.first().variant.toString();
1235 QTextBlockFormat::LineHeightTypes lineHeightType;
1236 if (lineHeightTypeName.compare(other: QLatin1String("proportional"), cs: Qt::CaseInsensitive) == 0)
1237 lineHeightType = QTextBlockFormat::ProportionalHeight;
1238 else if (lineHeightTypeName.compare(other: QLatin1String("fixed"), cs: Qt::CaseInsensitive) == 0)
1239 lineHeightType = QTextBlockFormat::FixedHeight;
1240 else if (lineHeightTypeName.compare(other: QLatin1String("minimum"), cs: Qt::CaseInsensitive) == 0)
1241 lineHeightType = QTextBlockFormat::MinimumHeight;
1242 else if (lineHeightTypeName.compare(other: QLatin1String("line-distance"), cs: Qt::CaseInsensitive) == 0)
1243 lineHeightType = QTextBlockFormat::LineDistanceHeight;
1244 else
1245 lineHeightType = QTextBlockFormat::SingleHeight;
1246
1247 if (hasLineHeightMultiplier) {
1248 qreal lineHeight = blockFormat.lineHeight() / 100.0;
1249 blockFormat.setProperty(propertyId: QTextBlockFormat::LineHeight, value: lineHeight);
1250 }
1251
1252 blockFormat.setProperty(propertyId: QTextBlockFormat::LineHeightType, value: lineHeightType);
1253 hasOwnLineHeightType = true;
1254 }
1255 break;
1256 case QCss::LineHeight: {
1257 qreal lineHeight;
1258 QTextBlockFormat::LineHeightTypes lineHeightType;
1259 if (decl.realValue(r: &lineHeight, unit: "px")) {
1260 lineHeightType = QTextBlockFormat::MinimumHeight;
1261 } else {
1262 bool ok;
1263 QCss::Value cssValue = decl.d->values.first();
1264 QString value = cssValue.toString();
1265 lineHeight = value.toDouble(ok: &ok);
1266 if (ok) {
1267 if (!hasOwnLineHeightType && cssValue.type == QCss::Value::Number) {
1268 lineHeight *= 100.0;
1269 hasLineHeightMultiplier = true;
1270 }
1271 lineHeightType = QTextBlockFormat::ProportionalHeight;
1272 } else {
1273 lineHeight = 0.0;
1274 lineHeightType = QTextBlockFormat::SingleHeight;
1275 }
1276 }
1277
1278 // Only override line height type if specified in same node
1279 if (hasOwnLineHeightType)
1280 lineHeightType = QTextBlockFormat::LineHeightTypes(blockFormat.lineHeightType());
1281
1282 blockFormat.setLineHeight(height: lineHeight, heightType: lineHeightType);
1283 break;
1284 }
1285 case QCss::TextIndent: {
1286 qreal indent = 0;
1287 if (decl.realValue(r: &indent, unit: "px"))
1288 blockFormat.setTextIndent(indent);
1289 break; }
1290 case QCss::QtListIndent:
1291 if (decl.intValue(i: &cssListIndent))
1292 hasCssListIndent = true;
1293 break;
1294 case QCss::QtParagraphType:
1295 if (decl.d->values.first().variant.toString().compare(other: QLatin1String("empty"), cs: Qt::CaseInsensitive) == 0)
1296 isEmptyParagraph = true;
1297 break;
1298 case QCss::QtTableType:
1299 if (decl.d->values.first().variant.toString().compare(other: QLatin1String("frame"), cs: Qt::CaseInsensitive) == 0)
1300 isTextFrame = true;
1301 else if (decl.d->values.first().variant.toString().compare(other: QLatin1String("root"), cs: Qt::CaseInsensitive) == 0) {
1302 isTextFrame = true;
1303 isRootFrame = true;
1304 }
1305 break;
1306 case QCss::QtUserState:
1307 userState = decl.d->values.first().variant.toInt();
1308 break;
1309 case QCss::Whitespace:
1310 switch (identifier) {
1311 case QCss::Value_Normal: wsm = QTextHtmlParserNode::WhiteSpaceNormal; break;
1312 case QCss::Value_Pre: wsm = QTextHtmlParserNode::WhiteSpacePre; break;
1313 case QCss::Value_NoWrap: wsm = QTextHtmlParserNode::WhiteSpaceNoWrap; break;
1314 case QCss::Value_PreWrap: wsm = QTextHtmlParserNode::WhiteSpacePreWrap; break;
1315 case QCss::Value_PreLine: wsm = QTextHtmlParserNode::WhiteSpacePreLine; break;
1316 default: break;
1317 }
1318 break;
1319 case QCss::VerticalAlignment:
1320 switch (identifier) {
1321 case QCss::Value_Sub: charFormat.setVerticalAlignment(QTextCharFormat::AlignSubScript); break;
1322 case QCss::Value_Super: charFormat.setVerticalAlignment(QTextCharFormat::AlignSuperScript); break;
1323 case QCss::Value_Middle: charFormat.setVerticalAlignment(QTextCharFormat::AlignMiddle); break;
1324 case QCss::Value_Top: charFormat.setVerticalAlignment(QTextCharFormat::AlignTop); break;
1325 case QCss::Value_Bottom: charFormat.setVerticalAlignment(QTextCharFormat::AlignBottom); break;
1326 default: charFormat.setVerticalAlignment(QTextCharFormat::AlignNormal); break;
1327 }
1328 break;
1329 case QCss::PageBreakBefore:
1330 switch (identifier) {
1331 case QCss::Value_Always: blockFormat.setPageBreakPolicy(blockFormat.pageBreakPolicy() | QTextFormat::PageBreak_AlwaysBefore); break;
1332 case QCss::Value_Auto: blockFormat.setPageBreakPolicy(blockFormat.pageBreakPolicy() & ~QTextFormat::PageBreak_AlwaysBefore); break;
1333 default: break;
1334 }
1335 break;
1336 case QCss::PageBreakAfter:
1337 switch (identifier) {
1338 case QCss::Value_Always: blockFormat.setPageBreakPolicy(blockFormat.pageBreakPolicy() | QTextFormat::PageBreak_AlwaysAfter); break;
1339 case QCss::Value_Auto: blockFormat.setPageBreakPolicy(blockFormat.pageBreakPolicy() & ~QTextFormat::PageBreak_AlwaysAfter); break;
1340 default: break;
1341 }
1342 break;
1343 case QCss::TextUnderlineStyle:
1344 switch (identifier) {
1345 case QCss::Value_None: charFormat.setUnderlineStyle(QTextCharFormat::NoUnderline); break;
1346 case QCss::Value_Solid: charFormat.setUnderlineStyle(QTextCharFormat::SingleUnderline); break;
1347 case QCss::Value_Dashed: charFormat.setUnderlineStyle(QTextCharFormat::DashUnderline); break;
1348 case QCss::Value_Dotted: charFormat.setUnderlineStyle(QTextCharFormat::DotLine); break;
1349 case QCss::Value_DotDash: charFormat.setUnderlineStyle(QTextCharFormat::DashDotLine); break;
1350 case QCss::Value_DotDotDash: charFormat.setUnderlineStyle(QTextCharFormat::DashDotDotLine); break;
1351 case QCss::Value_Wave: charFormat.setUnderlineStyle(QTextCharFormat::WaveUnderline); break;
1352 default: break;
1353 }
1354 break;
1355 case QCss::ListStyleType:
1356 case QCss::ListStyle:
1357 setListStyle(decl.d->values);
1358 break;
1359 case QCss::QtListNumberPrefix:
1360 textListNumberPrefix = decl.d->values.first().variant.toString();
1361 break;
1362 case QCss::QtListNumberSuffix:
1363 textListNumberSuffix = decl.d->values.first().variant.toString();
1364 break;
1365 case QCss::TextAlignment:
1366 switch (identifier) {
1367 case QCss::Value_Left: blockFormat.setAlignment(Qt::AlignLeft); break;
1368 case QCss::Value_Center: blockFormat.setAlignment(Qt::AlignCenter); break;
1369 case QCss::Value_Right: blockFormat.setAlignment(Qt::AlignRight); break;
1370 default: break;
1371 }
1372 break;
1373
1374 case QCss::QtForegroundTextureCacheKey:
1375 {
1376 if (resourceProvider != nullptr && resourceProvider->docHandle() != nullptr) {
1377 bool ok;
1378 qint64 searchKey = decl.d->values.first().variant.toLongLong(ok: &ok);
1379 if (ok)
1380 applyForegroundImage(cacheKey: searchKey, resourceProvider);
1381 }
1382 break;
1383 }
1384 default: break;
1385 }
1386 }
1387
1388 QFont f;
1389 int adjustment = -255;
1390 extractor.extractFont(font: &f, fontSizeAdjustment: &adjustment);
1391 if (f.pixelSize() > INT32_MAX / 2)
1392 f.setPixelSize(INT32_MAX / 2); // avoid even more extreme values
1393 charFormat.setFont(font: f, behavior: QTextCharFormat::FontPropertiesSpecifiedOnly);
1394
1395 if (adjustment >= -1)
1396 charFormat.setProperty(propertyId: QTextFormat::FontSizeAdjustment, value: adjustment);
1397
1398 {
1399 Qt::Alignment ignoredAlignment;
1400 QCss::Repeat ignoredRepeat;
1401 QString bgImage;
1402 QBrush bgBrush;
1403 QCss::Origin ignoredOrigin, ignoredClip;
1404 QCss::Attachment ignoredAttachment;
1405 extractor.extractBackground(&bgBrush, &bgImage, &ignoredRepeat, &ignoredAlignment,
1406 &ignoredOrigin, &ignoredAttachment, &ignoredClip);
1407
1408 if (!bgImage.isEmpty() && resourceProvider) {
1409 applyBackgroundImage(url: bgImage, resourceProvider);
1410 } else if (bgBrush.style() != Qt::NoBrush) {
1411 charFormat.setBackground(bgBrush);
1412 }
1413 }
1414}
1415
1416#endif // QT_NO_CSSPARSER
1417
1418void QTextHtmlParserNode::applyForegroundImage(qint64 searchKey, const QTextDocument *resourceProvider)
1419{
1420 QTextDocumentPrivate *priv = resourceProvider->docHandle();
1421 for (int i = 0; i < priv->formats.numFormats(); ++i) {
1422 QTextCharFormat format = priv->formats.charFormat(index: i);
1423 if (format.isValid()) {
1424 QBrush brush = format.foreground();
1425 if (brush.style() == Qt::TexturePattern) {
1426 const bool isPixmap = qHasPixmapTexture(brush);
1427
1428 if (isPixmap && QCoreApplication::instance()->thread() != QThread::currentThread()) {
1429 qWarning(msg: "Can't apply QPixmap outside of GUI thread");
1430 return;
1431 }
1432
1433 const qint64 cacheKey = isPixmap ? brush.texture().cacheKey() : brush.textureImage().cacheKey();
1434 if (cacheKey == searchKey) {
1435 QBrush b;
1436 if (isPixmap)
1437 b.setTexture(brush.texture());
1438 else
1439 b.setTextureImage(brush.textureImage());
1440 b.setStyle(Qt::TexturePattern);
1441 charFormat.setForeground(b);
1442 }
1443 }
1444 }
1445 }
1446
1447}
1448
1449void QTextHtmlParserNode::applyBackgroundImage(const QString &url, const QTextDocument *resourceProvider)
1450{
1451 if (!url.isEmpty() && resourceProvider) {
1452 QVariant val = resourceProvider->resource(type: QTextDocument::ImageResource, name: url);
1453
1454 if (QCoreApplication::instance()->thread() != QThread::currentThread()) {
1455 // must use images in non-GUI threads
1456 if (val.userType() == QMetaType::QImage) {
1457 QImage image = qvariant_cast<QImage>(v: val);
1458 charFormat.setBackground(image);
1459 } else if (val.userType() == QMetaType::QByteArray) {
1460 QImage image;
1461 if (image.loadFromData(data: val.toByteArray())) {
1462 charFormat.setBackground(image);
1463 }
1464 }
1465 } else {
1466 if (val.userType() == QMetaType::QImage || val.userType() == QMetaType::QPixmap) {
1467 charFormat.setBackground(qvariant_cast<QPixmap>(v: val));
1468 } else if (val.userType() == QMetaType::QByteArray) {
1469 QPixmap pm;
1470 if (pm.loadFromData(buf: val.toByteArray())) {
1471 charFormat.setBackground(pm);
1472 }
1473 }
1474 }
1475 }
1476 if (!url.isEmpty())
1477 charFormat.setProperty(propertyId: QTextFormat::BackgroundImageUrl, value: url);
1478}
1479
1480bool QTextHtmlParserNode::hasOnlyWhitespace() const
1481{
1482 for (int i = 0; i < text.count(); ++i)
1483 if (!text.at(i).isSpace() || text.at(i) == QChar::LineSeparator)
1484 return false;
1485 return true;
1486}
1487
1488static bool setIntAttribute(int *destination, const QString &value)
1489{
1490 bool ok = false;
1491 int val = value.toInt(ok: &ok);
1492 if (ok)
1493 *destination = val;
1494
1495 return ok;
1496}
1497
1498static bool setFloatAttribute(qreal *destination, const QString &value)
1499{
1500 bool ok = false;
1501 qreal val = value.toDouble(ok: &ok);
1502 if (ok)
1503 *destination = val;
1504
1505 return ok;
1506}
1507
1508static void setWidthAttribute(QTextLength *width, const QString &valueStr)
1509{
1510 bool ok = false;
1511 qreal realVal = valueStr.toDouble(ok: &ok);
1512 if (ok) {
1513 *width = QTextLength(QTextLength::FixedLength, realVal);
1514 } else {
1515 QStringRef value = QStringRef(&valueStr).trimmed();
1516 if (!value.isEmpty() && value.endsWith(c: QLatin1Char('%'))) {
1517 value.truncate(pos: value.size() - 1);
1518 realVal = value.toDouble(ok: &ok);
1519 if (ok)
1520 *width = QTextLength(QTextLength::PercentageLength, realVal);
1521 }
1522 }
1523}
1524
1525#ifndef QT_NO_CSSPARSER
1526void QTextHtmlParserNode::parseStyleAttribute(const QString &value, const QTextDocument *resourceProvider)
1527{
1528 const QString css = QLatin1String("* {") + value + QLatin1Char('}');
1529 QCss::Parser parser(css);
1530 QCss::StyleSheet sheet;
1531 parser.parse(styleSheet: &sheet, nameCaseSensitivity: Qt::CaseInsensitive);
1532 if (sheet.styleRules.count() != 1) return;
1533 applyCssDeclarations(declarations: sheet.styleRules.at(i: 0).declarations, resourceProvider);
1534}
1535#endif
1536
1537QStringList QTextHtmlParser::parseAttributes()
1538{
1539 QStringList attrs;
1540
1541 while (pos < len) {
1542 eatSpace();
1543 if (hasPrefix(c: QLatin1Char('>')) || hasPrefix(c: QLatin1Char('/')))
1544 break;
1545 QString key = parseWord().toLower();
1546 QString value = QLatin1String("1");
1547 if (key.size() == 0)
1548 break;
1549 eatSpace();
1550 if (hasPrefix(c: QLatin1Char('='))){
1551 pos++;
1552 eatSpace();
1553 value = parseWord();
1554 }
1555 if (value.size() == 0)
1556 continue;
1557 attrs << key << value;
1558 }
1559
1560 return attrs;
1561}
1562
1563void QTextHtmlParser::applyAttributes(const QStringList &attributes)
1564{
1565 // local state variable for qt3 textedit mode
1566 bool seenQt3Richtext = false;
1567 QString linkHref;
1568 QString linkType;
1569
1570 if (attributes.count() % 2 == 1)
1571 return;
1572
1573 QTextHtmlParserNode *node = &nodes.last();
1574
1575 for (int i = 0; i < attributes.count(); i += 2) {
1576 QString key = attributes.at(i);
1577 QString value = attributes.at(i: i + 1);
1578
1579 switch (node->id) {
1580 case Html_font:
1581 // the infamous font tag
1582 if (key == QLatin1String("size") && value.size()) {
1583 int n = value.toInt();
1584 if (value.at(i: 0) != QLatin1Char('+') && value.at(i: 0) != QLatin1Char('-'))
1585 n -= 3;
1586 node->charFormat.setProperty(propertyId: QTextFormat::FontSizeAdjustment, value: n);
1587 } else if (key == QLatin1String("face")) {
1588 if (value.contains(c: QLatin1Char(','))) {
1589 const QStringList values = value.split(sep: QLatin1Char(','));
1590 QStringList families;
1591 for (const QString &family : values)
1592 families << family.trimmed();
1593 node->charFormat.setFontFamilies(families);
1594 node->charFormat.setFontFamily(families.at(i: 0));
1595 } else {
1596 node->charFormat.setFontFamily(value);
1597 }
1598 } else if (key == QLatin1String("color")) {
1599 QColor c; c.setNamedColor(value);
1600 if (!c.isValid())
1601 qWarning(msg: "QTextHtmlParser::applyAttributes: Unknown color name '%s'",value.toLatin1().constData());
1602 node->charFormat.setForeground(c);
1603 }
1604 break;
1605 case Html_ol:
1606 case Html_ul:
1607 if (key == QLatin1String("type")) {
1608 node->hasOwnListStyle = true;
1609 if (value == QLatin1String("1")) {
1610 node->listStyle = QTextListFormat::ListDecimal;
1611 } else if (value == QLatin1String("a")) {
1612 node->listStyle = QTextListFormat::ListLowerAlpha;
1613 } else if (value == QLatin1String("A")) {
1614 node->listStyle = QTextListFormat::ListUpperAlpha;
1615 } else if (value == QLatin1String("i")) {
1616 node->listStyle = QTextListFormat::ListLowerRoman;
1617 } else if (value == QLatin1String("I")) {
1618 node->listStyle = QTextListFormat::ListUpperRoman;
1619 } else {
1620 value = std::move(value).toLower();
1621 if (value == QLatin1String("square"))
1622 node->listStyle = QTextListFormat::ListSquare;
1623 else if (value == QLatin1String("disc"))
1624 node->listStyle = QTextListFormat::ListDisc;
1625 else if (value == QLatin1String("circle"))
1626 node->listStyle = QTextListFormat::ListCircle;
1627 else if (value == QLatin1String("none"))
1628 node->listStyle = QTextListFormat::ListStyleUndefined;
1629 }
1630 }
1631 break;
1632 case Html_a:
1633 if (key == QLatin1String("href"))
1634 node->charFormat.setAnchorHref(value);
1635 else if (key == QLatin1String("name"))
1636 node->charFormat.setAnchorNames({value});
1637 break;
1638 case Html_img:
1639 if (key == QLatin1String("src") || key == QLatin1String("source")) {
1640 node->imageName = value;
1641 } else if (key == QLatin1String("width")) {
1642 node->imageWidth = -2; // register that there is a value for it.
1643 setFloatAttribute(destination: &node->imageWidth, value);
1644 } else if (key == QLatin1String("height")) {
1645 node->imageHeight = -2; // register that there is a value for it.
1646 setFloatAttribute(destination: &node->imageHeight, value);
1647 } else if (key == QLatin1String("alt")) {
1648 node->imageAlt = value;
1649 } else if (key == QLatin1String("title")) {
1650 node->text = value;
1651 }
1652 break;
1653 case Html_tr:
1654 case Html_body:
1655 if (key == QLatin1String("bgcolor")) {
1656 QColor c; c.setNamedColor(value);
1657 if (!c.isValid())
1658 qWarning(msg: "QTextHtmlParser::applyAttributes: Unknown color name '%s'",value.toLatin1().constData());
1659 node->charFormat.setBackground(c);
1660 } else if (key == QLatin1String("background")) {
1661 node->applyBackgroundImage(url: value, resourceProvider);
1662 }
1663 break;
1664 case Html_th:
1665 case Html_td:
1666 if (key == QLatin1String("width")) {
1667 setWidthAttribute(width: &node->width, valueStr: value);
1668 } else if (key == QLatin1String("bgcolor")) {
1669 QColor c; c.setNamedColor(value);
1670 if (!c.isValid())
1671 qWarning(msg: "QTextHtmlParser::applyAttributes: Unknown color name '%s'",value.toLatin1().constData());
1672 node->charFormat.setBackground(c);
1673 } else if (key == QLatin1String("background")) {
1674 node->applyBackgroundImage(url: value, resourceProvider);
1675 } else if (key == QLatin1String("rowspan")) {
1676 if (setIntAttribute(destination: &node->tableCellRowSpan, value))
1677 node->tableCellRowSpan = qMax(a: 1, b: node->tableCellRowSpan);
1678 } else if (key == QLatin1String("colspan")) {
1679 if (setIntAttribute(destination: &node->tableCellColSpan, value))
1680 node->tableCellColSpan = qBound(min: 1, val: node->tableCellColSpan, max: 20480);
1681 }
1682 break;
1683 case Html_table:
1684 if (key == QLatin1String("border")) {
1685 setFloatAttribute(destination: &node->tableBorder, value);
1686 } else if (key == QLatin1String("bgcolor")) {
1687 QColor c; c.setNamedColor(value);
1688 if (!c.isValid())
1689 qWarning(msg: "QTextHtmlParser::applyAttributes: Unknown color name '%s'",value.toLatin1().constData());
1690 node->charFormat.setBackground(c);
1691 } else if (key == QLatin1String("bordercolor")) {
1692 QColor c; c.setNamedColor(value);
1693 if (!c.isValid())
1694 qWarning(msg: "QTextHtmlParser::applyAttributes: Unknown color name '%s'",value.toLatin1().constData());
1695 node->borderBrush = c;
1696 } else if (key == QLatin1String("background")) {
1697 node->applyBackgroundImage(url: value, resourceProvider);
1698 } else if (key == QLatin1String("cellspacing")) {
1699 setFloatAttribute(destination: &node->tableCellSpacing, value);
1700 } else if (key == QLatin1String("cellpadding")) {
1701 setFloatAttribute(destination: &node->tableCellPadding, value);
1702 } else if (key == QLatin1String("width")) {
1703 setWidthAttribute(width: &node->width, valueStr: value);
1704 } else if (key == QLatin1String("height")) {
1705 setWidthAttribute(width: &node->height, valueStr: value);
1706 }
1707 break;
1708 case Html_meta:
1709 if (key == QLatin1String("name")
1710 && value == QLatin1String("qrichtext")) {
1711 seenQt3Richtext = true;
1712 }
1713
1714 if (key == QLatin1String("content")
1715 && value == QLatin1String("1")
1716 && seenQt3Richtext) {
1717
1718 textEditMode = true;
1719 }
1720 break;
1721 case Html_hr:
1722 if (key == QLatin1String("width"))
1723 setWidthAttribute(width: &node->width, valueStr: value);
1724 break;
1725 case Html_link:
1726 if (key == QLatin1String("href"))
1727 linkHref = value;
1728 else if (key == QLatin1String("type"))
1729 linkType = value;
1730 break;
1731 case Html_pre:
1732 if (key == QLatin1String("class") && value.startsWith(s: QLatin1String("language-")))
1733 node->blockFormat.setProperty(propertyId: QTextFormat::BlockCodeLanguage, value: value.mid(position: 9));
1734 break;
1735 default:
1736 break;
1737 }
1738
1739 if (key == QLatin1String("style")) {
1740#ifndef QT_NO_CSSPARSER
1741 node->parseStyleAttribute(value, resourceProvider);
1742#endif
1743 } else if (key == QLatin1String("align")) {
1744 value = std::move(value).toLower();
1745 bool alignmentSet = true;
1746
1747 if (value == QLatin1String("left"))
1748 node->blockFormat.setAlignment(Qt::AlignLeft|Qt::AlignAbsolute);
1749 else if (value == QLatin1String("right"))
1750 node->blockFormat.setAlignment(Qt::AlignRight|Qt::AlignAbsolute);
1751 else if (value == QLatin1String("center"))
1752 node->blockFormat.setAlignment(Qt::AlignHCenter);
1753 else if (value == QLatin1String("justify"))
1754 node->blockFormat.setAlignment(Qt::AlignJustify);
1755 else
1756 alignmentSet = false;
1757
1758 if (node->id == Html_img) {
1759 // HTML4 compat
1760 if (alignmentSet) {
1761 if (node->blockFormat.alignment() & Qt::AlignLeft)
1762 node->cssFloat = QTextFrameFormat::FloatLeft;
1763 else if (node->blockFormat.alignment() & Qt::AlignRight)
1764 node->cssFloat = QTextFrameFormat::FloatRight;
1765 } else if (value == QLatin1String("middle")) {
1766 node->charFormat.setVerticalAlignment(QTextCharFormat::AlignMiddle);
1767 } else if (value == QLatin1String("top")) {
1768 node->charFormat.setVerticalAlignment(QTextCharFormat::AlignTop);
1769 }
1770 }
1771 } else if (key == QLatin1String("valign")) {
1772 value = std::move(value).toLower();
1773 if (value == QLatin1String("top"))
1774 node->charFormat.setVerticalAlignment(QTextCharFormat::AlignTop);
1775 else if (value == QLatin1String("middle"))
1776 node->charFormat.setVerticalAlignment(QTextCharFormat::AlignMiddle);
1777 else if (value == QLatin1String("bottom"))
1778 node->charFormat.setVerticalAlignment(QTextCharFormat::AlignBottom);
1779 } else if (key == QLatin1String("dir")) {
1780 value = std::move(value).toLower();
1781 if (value == QLatin1String("ltr"))
1782 node->blockFormat.setLayoutDirection(Qt::LeftToRight);
1783 else if (value == QLatin1String("rtl"))
1784 node->blockFormat.setLayoutDirection(Qt::RightToLeft);
1785 } else if (key == QLatin1String("title")) {
1786 node->charFormat.setToolTip(value);
1787 } else if (key == QLatin1String("id")) {
1788 node->charFormat.setAnchor(true);
1789 node->charFormat.setAnchorNames({value});
1790 }
1791 }
1792
1793#ifndef QT_NO_CSSPARSER
1794 if (resourceProvider && !linkHref.isEmpty() && linkType == QLatin1String("text/css"))
1795 importStyleSheet(href: linkHref);
1796#endif
1797}
1798
1799#ifndef QT_NO_CSSPARSER
1800class QTextHtmlStyleSelector : public QCss::StyleSelector
1801{
1802public:
1803 inline QTextHtmlStyleSelector(const QTextHtmlParser *parser)
1804 : parser(parser) { nameCaseSensitivity = Qt::CaseInsensitive; }
1805
1806 virtual QStringList nodeNames(NodePtr node) const override;
1807 virtual QString attribute(NodePtr node, const QString &name) const override;
1808 virtual bool hasAttributes(NodePtr node) const override;
1809 virtual bool isNullNode(NodePtr node) const override;
1810 virtual NodePtr parentNode(NodePtr node) const override;
1811 virtual NodePtr previousSiblingNode(NodePtr node) const override;
1812 virtual NodePtr duplicateNode(NodePtr node) const override;
1813 virtual void freeNode(NodePtr node) const override;
1814
1815private:
1816 const QTextHtmlParser *parser;
1817};
1818
1819QStringList QTextHtmlStyleSelector::nodeNames(NodePtr node) const
1820{
1821 return QStringList(parser->at(i: node.id).tag.toLower());
1822}
1823
1824#endif // QT_NO_CSSPARSER
1825
1826#ifndef QT_NO_CSSPARSER
1827
1828static inline int findAttribute(const QStringList &attributes, const QString &name)
1829{
1830 int idx = -1;
1831 do {
1832 idx = attributes.indexOf(t: name, from: idx + 1);
1833 } while (idx != -1 && (idx % 2 == 1));
1834 return idx;
1835}
1836
1837QString QTextHtmlStyleSelector::attribute(NodePtr node, const QString &name) const
1838{
1839 const QStringList &attributes = parser->at(i: node.id).attributes;
1840 const int idx = findAttribute(attributes, name);
1841 if (idx == -1)
1842 return QString();
1843 return attributes.at(i: idx + 1);
1844}
1845
1846bool QTextHtmlStyleSelector::hasAttributes(NodePtr node) const
1847{
1848 const QStringList &attributes = parser->at(i: node.id).attributes;
1849 return !attributes.isEmpty();
1850}
1851
1852bool QTextHtmlStyleSelector::isNullNode(NodePtr node) const
1853{
1854 return node.id == 0;
1855}
1856
1857QCss::StyleSelector::NodePtr QTextHtmlStyleSelector::parentNode(NodePtr node) const
1858{
1859 NodePtr parent;
1860 parent.id = 0;
1861 if (node.id) {
1862 parent.id = parser->at(i: node.id).parent;
1863 }
1864 return parent;
1865}
1866
1867QCss::StyleSelector::NodePtr QTextHtmlStyleSelector::duplicateNode(NodePtr node) const
1868{
1869 return node;
1870}
1871
1872QCss::StyleSelector::NodePtr QTextHtmlStyleSelector::previousSiblingNode(NodePtr node) const
1873{
1874 NodePtr sibling;
1875 sibling.id = 0;
1876 if (!node.id)
1877 return sibling;
1878 int parent = parser->at(i: node.id).parent;
1879 if (!parent)
1880 return sibling;
1881 const int childIdx = parser->at(i: parent).children.indexOf(t: node.id);
1882 if (childIdx <= 0)
1883 return sibling;
1884 sibling.id = parser->at(i: parent).children.at(i: childIdx - 1);
1885 return sibling;
1886}
1887
1888void QTextHtmlStyleSelector::freeNode(NodePtr) const
1889{
1890}
1891
1892void QTextHtmlParser::resolveStyleSheetImports(const QCss::StyleSheet &sheet)
1893{
1894 for (int i = 0; i < sheet.importRules.count(); ++i) {
1895 const QCss::ImportRule &rule = sheet.importRules.at(i);
1896 if (rule.media.isEmpty()
1897 || rule.media.contains(str: QLatin1String("screen"), cs: Qt::CaseInsensitive))
1898 importStyleSheet(href: rule.href);
1899 }
1900}
1901
1902void QTextHtmlParser::importStyleSheet(const QString &href)
1903{
1904 if (!resourceProvider)
1905 return;
1906 for (int i = 0; i < externalStyleSheets.count(); ++i)
1907 if (externalStyleSheets.at(i).url == href)
1908 return;
1909
1910 QVariant res = resourceProvider->resource(type: QTextDocument::StyleSheetResource, name: href);
1911 QString css;
1912 if (res.userType() == QMetaType::QString) {
1913 css = res.toString();
1914 } else if (res.userType() == QMetaType::QByteArray) {
1915 // #### detect @charset
1916 css = QString::fromUtf8(str: res.toByteArray());
1917 }
1918 if (!css.isEmpty()) {
1919 QCss::Parser parser(css);
1920 QCss::StyleSheet sheet;
1921 parser.parse(styleSheet: &sheet, nameCaseSensitivity: Qt::CaseInsensitive);
1922 externalStyleSheets.append(t: ExternalStyleSheet(href, sheet));
1923 resolveStyleSheetImports(sheet);
1924 }
1925}
1926
1927QVector<QCss::Declaration> standardDeclarationForNode(const QTextHtmlParserNode &node)
1928{
1929 QVector<QCss::Declaration> decls;
1930 QCss::Declaration decl;
1931 QCss::Value val;
1932 switch (node.id) {
1933 case Html_a:
1934 case Html_u: {
1935 bool needsUnderline = (node.id == Html_u) ? true : false;
1936 if (node.id == Html_a) {
1937 for (int i = 0; i < node.attributes.count(); i += 2) {
1938 const QString key = node.attributes.at(i);
1939 if (key.compare(other: QLatin1String("href"), cs: Qt::CaseInsensitive) == 0
1940 && !node.attributes.at(i: i + 1).isEmpty()) {
1941 needsUnderline = true;
1942 decl.d->property = QLatin1String("color");
1943 decl.d->propertyId = QCss::Color;
1944 val.type = QCss::Value::Color;
1945 val.variant = QVariant(QGuiApplication::palette().link());
1946 decl.d->values = QVector<QCss::Value>() << val;
1947 decl.d->inheritable = true;
1948 decls << decl;
1949 break;
1950 }
1951 }
1952 }
1953 if (needsUnderline) {
1954 decl = QCss::Declaration();
1955 decl.d->property = QLatin1String("text-decoration");
1956 decl.d->propertyId = QCss::TextDecoration;
1957 val.type = QCss::Value::KnownIdentifier;
1958 val.variant = QVariant(QCss::Value_Underline);
1959 decl.d->values = QVector<QCss::Value>() << val;
1960 decl.d->inheritable = true;
1961 decls << decl;
1962 }
1963 break;
1964 }
1965 case Html_b:
1966 case Html_strong:
1967 case Html_h1:
1968 case Html_h2:
1969 case Html_h3:
1970 case Html_h4:
1971 case Html_h5:
1972 case Html_th:
1973 decl = QCss::Declaration();
1974 decl.d->property = QLatin1String("font-weight");
1975 decl.d->propertyId = QCss::FontWeight;
1976 val.type = QCss::Value::KnownIdentifier;
1977 val.variant = QVariant(QCss::Value_Bold);
1978 decl.d->values = QVector<QCss::Value>() << val;
1979 decl.d->inheritable = true;
1980 decls << decl;
1981 if (node.id == Html_b || node.id == Html_strong)
1982 break;
1983 Q_FALLTHROUGH();
1984 case Html_big:
1985 case Html_small:
1986 if (node.id != Html_th) {
1987 decl = QCss::Declaration();
1988 decl.d->property = QLatin1String("font-size");
1989 decl.d->propertyId = QCss::FontSize;
1990 decl.d->inheritable = false;
1991 val.type = QCss::Value::KnownIdentifier;
1992 switch (node.id) {
1993 case Html_h1: val.variant = QVariant(QCss::Value_XXLarge); break;
1994 case Html_h2: val.variant = QVariant(QCss::Value_XLarge); break;
1995 case Html_h3: case Html_big: val.variant = QVariant(QCss::Value_Large); break;
1996 case Html_h4: val.variant = QVariant(QCss::Value_Medium); break;
1997 case Html_h5: case Html_small: val.variant = QVariant(QCss::Value_Small); break;
1998 default: break;
1999 }
2000 decl.d->values = QVector<QCss::Value>() << val;
2001 decls << decl;
2002 break;
2003 }
2004 Q_FALLTHROUGH();
2005 case Html_center:
2006 case Html_td:
2007 decl = QCss::Declaration();
2008 decl.d->property = QLatin1String("text-align");
2009 decl.d->propertyId = QCss::TextAlignment;
2010 val.type = QCss::Value::KnownIdentifier;
2011 val.variant = (node.id == Html_td) ? QVariant(QCss::Value_Left) : QVariant(QCss::Value_Center);
2012 decl.d->values = QVector<QCss::Value>() << val;
2013 decl.d->inheritable = true;
2014 decls << decl;
2015 break;
2016 case Html_s:
2017 decl = QCss::Declaration();
2018 decl.d->property = QLatin1String("text-decoration");
2019 decl.d->propertyId = QCss::TextDecoration;
2020 val.type = QCss::Value::KnownIdentifier;
2021 val.variant = QVariant(QCss::Value_LineThrough);
2022 decl.d->values = QVector<QCss::Value>() << val;
2023 decl.d->inheritable = true;
2024 decls << decl;
2025 break;
2026 case Html_em:
2027 case Html_i:
2028 case Html_cite:
2029 case Html_address:
2030 case Html_var:
2031 case Html_dfn:
2032 decl = QCss::Declaration();
2033 decl.d->property = QLatin1String("font-style");
2034 decl.d->propertyId = QCss::FontStyle;
2035 val.type = QCss::Value::KnownIdentifier;
2036 val.variant = QVariant(QCss::Value_Italic);
2037 decl.d->values = QVector<QCss::Value>() << val;
2038 decl.d->inheritable = true;
2039 decls << decl;
2040 break;
2041 case Html_sub:
2042 case Html_sup:
2043 decl = QCss::Declaration();
2044 decl.d->property = QLatin1String("vertical-align");
2045 decl.d->propertyId = QCss::VerticalAlignment;
2046 val.type = QCss::Value::KnownIdentifier;
2047 val.variant = (node.id == Html_sub) ? QVariant(QCss::Value_Sub) : QVariant(QCss::Value_Super);
2048 decl.d->values = QVector<QCss::Value>() << val;
2049 decl.d->inheritable = true;
2050 decls << decl;
2051 break;
2052 case Html_ul:
2053 case Html_ol:
2054 decl = QCss::Declaration();
2055 decl.d->property = QLatin1String("list-style");
2056 decl.d->propertyId = QCss::ListStyle;
2057 val.type = QCss::Value::KnownIdentifier;
2058 val.variant = (node.id == Html_ul) ? QVariant(QCss::Value_Disc) : QVariant(QCss::Value_Decimal);
2059 decl.d->values = QVector<QCss::Value>() << val;
2060 decl.d->inheritable = true;
2061 decls << decl;
2062 break;
2063 case Html_code:
2064 case Html_tt:
2065 case Html_kbd:
2066 case Html_samp:
2067 case Html_pre: {
2068 decl = QCss::Declaration();
2069 decl.d->property = QLatin1String("font-family");
2070 decl.d->propertyId = QCss::FontFamily;
2071 QVector<QCss::Value> values;
2072 val.type = QCss::Value::String;
2073 val.variant = QFontDatabase::systemFont(type: QFontDatabase::FixedFont).family();
2074 values << val;
2075 decl.d->values = values;
2076 decl.d->inheritable = true;
2077 decls << decl;
2078 }
2079 if (node.id != Html_pre)
2080 break;
2081 Q_FALLTHROUGH();
2082 case Html_br:
2083 case Html_nobr:
2084 decl = QCss::Declaration();
2085 decl.d->property = QLatin1String("whitespace");
2086 decl.d->propertyId = QCss::Whitespace;
2087 val.type = QCss::Value::KnownIdentifier;
2088 switch (node.id) {
2089 case Html_br: val.variant = QVariant(QCss::Value_PreWrap); break;
2090 case Html_nobr: val.variant = QVariant(QCss::Value_NoWrap); break;
2091 case Html_pre: val.variant = QVariant(QCss::Value_Pre); break;
2092 default: break;
2093 }
2094 decl.d->values = QVector<QCss::Value>() << val;
2095 decl.d->inheritable = true;
2096 decls << decl;
2097 break;
2098 default:
2099 break;
2100 }
2101 return decls;
2102}
2103
2104QVector<QCss::Declaration> QTextHtmlParser::declarationsForNode(int node) const
2105{
2106 QVector<QCss::Declaration> decls;
2107
2108 QTextHtmlStyleSelector selector(this);
2109
2110 int idx = 0;
2111 selector.styleSheets.resize(asize: (resourceProvider ? 1 : 0)
2112 + externalStyleSheets.count()
2113 + inlineStyleSheets.count());
2114 if (resourceProvider)
2115 selector.styleSheets[idx++] = resourceProvider->docHandle()->parsedDefaultStyleSheet;
2116
2117 for (int i = 0; i < externalStyleSheets.count(); ++i, ++idx)
2118 selector.styleSheets[idx] = externalStyleSheets.at(i).sheet;
2119
2120 for (int i = 0; i < inlineStyleSheets.count(); ++i, ++idx)
2121 selector.styleSheets[idx] = inlineStyleSheets.at(i);
2122
2123 selector.medium = QLatin1String("screen");
2124
2125 QCss::StyleSelector::NodePtr n;
2126 n.id = node;
2127
2128 const char *extraPseudo = nullptr;
2129 if (nodes.at(i: node).id == Html_a && nodes.at(i: node).hasHref)
2130 extraPseudo = "link";
2131 // Ensure that our own style is taken into consideration
2132 decls = standardDeclarationForNode(node: nodes.at(i: node));
2133 decls += selector.declarationsForNode(node: n, extraPseudo);
2134 n = selector.parentNode(node: n);
2135 while (!selector.isNullNode(node: n)) {
2136 QVector<QCss::Declaration> inheritedDecls;
2137 inheritedDecls = selector.declarationsForNode(node: n, extraPseudo);
2138 for (int i = 0; i < inheritedDecls.size(); ++i) {
2139 const QCss::Declaration &decl = inheritedDecls.at(i);
2140 if (decl.d->inheritable)
2141 decls.prepend(t: decl);
2142 }
2143 n = selector.parentNode(node: n);
2144 }
2145 return decls;
2146}
2147
2148bool QTextHtmlParser::nodeIsChildOf(int i, QTextHTMLElements id) const
2149{
2150 while (i) {
2151 if (at(i).id == id)
2152 return true;
2153 i = at(i).parent;
2154 }
2155 return false;
2156}
2157
2158QT_END_NAMESPACE
2159#endif // QT_NO_CSSPARSER
2160
2161#endif // QT_NO_TEXTHTMLPARSER
2162

source code of qtbase/src/gui/text/qtexthtmlparser.cpp