1/****************************************************************************
2**
3** Copyright (C) 2016 The Qt Company Ltd.
4** Contact: https://www.qt.io/licensing/
5**
6** This file is part of the QtGui module of the Qt Toolkit.
7**
8** $QT_BEGIN_LICENSE:LGPL$
9** Commercial License Usage
10** Licensees holding valid commercial Qt licenses may use this file in
11** accordance with the commercial license agreement provided with the
12** Software or, alternatively, in accordance with the terms contained in
13** a written agreement between you and The Qt Company. For licensing terms
14** and conditions see https://www.qt.io/terms-conditions. For further
15** information use the contact form at https://www.qt.io/contact-us.
16**
17** GNU Lesser General Public License Usage
18** Alternatively, this file may be used under the terms of the GNU Lesser
19** General Public License version 3 as published by the Free Software
20** Foundation and appearing in the file LICENSE.LGPL3 included in the
21** packaging of this file. Please review the following information to
22** ensure the GNU Lesser General Public License version 3 requirements
23** will be met: https://www.gnu.org/licenses/lgpl-3.0.html.
24**
25** GNU General Public License Usage
26** Alternatively, this file may be used under the terms of the GNU
27** General Public License version 2.0 or (at your option) the GNU General
28** Public license version 3 or any later version approved by the KDE Free
29** Qt Foundation. The licenses are as published by the Free Software
30** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3
31** included in the packaging of this file. Please review the following
32** information to ensure the GNU General Public License requirements will
33** be met: https://www.gnu.org/licenses/gpl-2.0.html and
34** https://www.gnu.org/licenses/gpl-3.0.html.
35**
36** $QT_END_LICENSE$
37**
38****************************************************************************/
39
40#include "qtexthtmlparser_p.h"
41
42#include <qbytearray.h>
43#include <qstack.h>
44#include <qdebug.h>
45#include <qthread.h>
46#include <qguiapplication.h>
47
48#include "qtextdocument.h"
49#include "qtextformat_p.h"
50#include "qtextdocument_p.h"
51#include "qtextcursor.h"
52#include "qfont_p.h"
53
54#include <algorithm>
55
56#ifndef QT_NO_TEXTHTMLPARSER
57
58QT_BEGIN_NAMESPACE
59
60// see also tst_qtextdocumentfragment.cpp
61#define MAX_ENTITY 258
62static const struct QTextHtmlEntity { const char name[9]; quint16 code; } entities[]= {
63 { "AElig", 0x00c6 },
64 { "AMP", 38 },
65 { "Aacute", 0x00c1 },
66 { "Acirc", 0x00c2 },
67 { "Agrave", 0x00c0 },
68 { "Alpha", 0x0391 },
69 { "Aring", 0x00c5 },
70 { "Atilde", 0x00c3 },
71 { "Auml", 0x00c4 },
72 { "Beta", 0x0392 },
73 { "Ccedil", 0x00c7 },
74 { "Chi", 0x03a7 },
75 { "Dagger", 0x2021 },
76 { "Delta", 0x0394 },
77 { "ETH", 0x00d0 },
78 { "Eacute", 0x00c9 },
79 { "Ecirc", 0x00ca },
80 { "Egrave", 0x00c8 },
81 { "Epsilon", 0x0395 },
82 { "Eta", 0x0397 },
83 { "Euml", 0x00cb },
84 { "GT", 62 },
85 { "Gamma", 0x0393 },
86 { "Iacute", 0x00cd },
87 { "Icirc", 0x00ce },
88 { "Igrave", 0x00cc },
89 { "Iota", 0x0399 },
90 { "Iuml", 0x00cf },
91 { "Kappa", 0x039a },
92 { "LT", 60 },
93 { "Lambda", 0x039b },
94 { "Mu", 0x039c },
95 { "Ntilde", 0x00d1 },
96 { "Nu", 0x039d },
97 { "OElig", 0x0152 },
98 { "Oacute", 0x00d3 },
99 { "Ocirc", 0x00d4 },
100 { "Ograve", 0x00d2 },
101 { "Omega", 0x03a9 },
102 { "Omicron", 0x039f },
103 { "Oslash", 0x00d8 },
104 { "Otilde", 0x00d5 },
105 { "Ouml", 0x00d6 },
106 { "Phi", 0x03a6 },
107 { "Pi", 0x03a0 },
108 { "Prime", 0x2033 },
109 { "Psi", 0x03a8 },
110 { "QUOT", 34 },
111 { "Rho", 0x03a1 },
112 { "Scaron", 0x0160 },
113 { "Sigma", 0x03a3 },
114 { "THORN", 0x00de },
115 { "Tau", 0x03a4 },
116 { "Theta", 0x0398 },
117 { "Uacute", 0x00da },
118 { "Ucirc", 0x00db },
119 { "Ugrave", 0x00d9 },
120 { "Upsilon", 0x03a5 },
121 { "Uuml", 0x00dc },
122 { "Xi", 0x039e },
123 { "Yacute", 0x00dd },
124 { "Yuml", 0x0178 },
125 { "Zeta", 0x0396 },
126 { "aacute", 0x00e1 },
127 { "acirc", 0x00e2 },
128 { "acute", 0x00b4 },
129 { "aelig", 0x00e6 },
130 { "agrave", 0x00e0 },
131 { "alefsym", 0x2135 },
132 { "alpha", 0x03b1 },
133 { "amp", 38 },
134 { "and", 0x22a5 },
135 { "ang", 0x2220 },
136 { "apos", 0x0027 },
137 { "aring", 0x00e5 },
138 { "asymp", 0x2248 },
139 { "atilde", 0x00e3 },
140 { "auml", 0x00e4 },
141 { "bdquo", 0x201e },
142 { "beta", 0x03b2 },
143 { "brvbar", 0x00a6 },
144 { "bull", 0x2022 },
145 { "cap", 0x2229 },
146 { "ccedil", 0x00e7 },
147 { "cedil", 0x00b8 },
148 { "cent", 0x00a2 },
149 { "chi", 0x03c7 },
150 { "circ", 0x02c6 },
151 { "clubs", 0x2663 },
152 { "cong", 0x2245 },
153 { "copy", 0x00a9 },
154 { "crarr", 0x21b5 },
155 { "cup", 0x222a },
156 { "curren", 0x00a4 },
157 { "dArr", 0x21d3 },
158 { "dagger", 0x2020 },
159 { "darr", 0x2193 },
160 { "deg", 0x00b0 },
161 { "delta", 0x03b4 },
162 { "diams", 0x2666 },
163 { "divide", 0x00f7 },
164 { "eacute", 0x00e9 },
165 { "ecirc", 0x00ea },
166 { "egrave", 0x00e8 },
167 { "empty", 0x2205 },
168 { "emsp", 0x2003 },
169 { "ensp", 0x2002 },
170 { "epsilon", 0x03b5 },
171 { "equiv", 0x2261 },
172 { "eta", 0x03b7 },
173 { "eth", 0x00f0 },
174 { "euml", 0x00eb },
175 { "euro", 0x20ac },
176 { "exist", 0x2203 },
177 { "fnof", 0x0192 },
178 { "forall", 0x2200 },
179 { "frac12", 0x00bd },
180 { "frac14", 0x00bc },
181 { "frac34", 0x00be },
182 { "frasl", 0x2044 },
183 { "gamma", 0x03b3 },
184 { "ge", 0x2265 },
185 { "gt", 62 },
186 { "hArr", 0x21d4 },
187 { "harr", 0x2194 },
188 { "hearts", 0x2665 },
189 { "hellip", 0x2026 },
190 { "iacute", 0x00ed },
191 { "icirc", 0x00ee },
192 { "iexcl", 0x00a1 },
193 { "igrave", 0x00ec },
194 { "image", 0x2111 },
195 { "infin", 0x221e },
196 { "int", 0x222b },
197 { "iota", 0x03b9 },
198 { "iquest", 0x00bf },
199 { "isin", 0x2208 },
200 { "iuml", 0x00ef },
201 { "kappa", 0x03ba },
202 { "lArr", 0x21d0 },
203 { "lambda", 0x03bb },
204 { "lang", 0x2329 },
205 { "laquo", 0x00ab },
206 { "larr", 0x2190 },
207 { "lceil", 0x2308 },
208 { "ldquo", 0x201c },
209 { "le", 0x2264 },
210 { "lfloor", 0x230a },
211 { "lowast", 0x2217 },
212 { "loz", 0x25ca },
213 { "lrm", 0x200e },
214 { "lsaquo", 0x2039 },
215 { "lsquo", 0x2018 },
216 { "lt", 60 },
217 { "macr", 0x00af },
218 { "mdash", 0x2014 },
219 { "micro", 0x00b5 },
220 { "middot", 0x00b7 },
221 { "minus", 0x2212 },
222 { "mu", 0x03bc },
223 { "nabla", 0x2207 },
224 { "nbsp", 0x00a0 },
225 { "ndash", 0x2013 },
226 { "ne", 0x2260 },
227 { "ni", 0x220b },
228 { "not", 0x00ac },
229 { "notin", 0x2209 },
230 { "nsub", 0x2284 },
231 { "ntilde", 0x00f1 },
232 { "nu", 0x03bd },
233 { "oacute", 0x00f3 },
234 { "ocirc", 0x00f4 },
235 { "oelig", 0x0153 },
236 { "ograve", 0x00f2 },
237 { "oline", 0x203e },
238 { "omega", 0x03c9 },
239 { "omicron", 0x03bf },
240 { "oplus", 0x2295 },
241 { "or", 0x22a6 },
242 { "ordf", 0x00aa },
243 { "ordm", 0x00ba },
244 { "oslash", 0x00f8 },
245 { "otilde", 0x00f5 },
246 { "otimes", 0x2297 },
247 { "ouml", 0x00f6 },
248 { "para", 0x00b6 },
249 { "part", 0x2202 },
250 { "percnt", 0x0025 },
251 { "permil", 0x2030 },
252 { "perp", 0x22a5 },
253 { "phi", 0x03c6 },
254 { "pi", 0x03c0 },
255 { "piv", 0x03d6 },
256 { "plusmn", 0x00b1 },
257 { "pound", 0x00a3 },
258 { "prime", 0x2032 },
259 { "prod", 0x220f },
260 { "prop", 0x221d },
261 { "psi", 0x03c8 },
262 { "quot", 34 },
263 { "rArr", 0x21d2 },
264 { "radic", 0x221a },
265 { "rang", 0x232a },
266 { "raquo", 0x00bb },
267 { "rarr", 0x2192 },
268 { "rceil", 0x2309 },
269 { "rdquo", 0x201d },
270 { "real", 0x211c },
271 { "reg", 0x00ae },
272 { "rfloor", 0x230b },
273 { "rho", 0x03c1 },
274 { "rlm", 0x200f },
275 { "rsaquo", 0x203a },
276 { "rsquo", 0x2019 },
277 { "sbquo", 0x201a },
278 { "scaron", 0x0161 },
279 { "sdot", 0x22c5 },
280 { "sect", 0x00a7 },
281 { "shy", 0x00ad },
282 { "sigma", 0x03c3 },
283 { "sigmaf", 0x03c2 },
284 { "sim", 0x223c },
285 { "spades", 0x2660 },
286 { "sub", 0x2282 },
287 { "sube", 0x2286 },
288 { "sum", 0x2211 },
289 { "sup", 0x2283 },
290 { "sup1", 0x00b9 },
291 { "sup2", 0x00b2 },
292 { "sup3", 0x00b3 },
293 { "supe", 0x2287 },
294 { "szlig", 0x00df },
295 { "tau", 0x03c4 },
296 { "there4", 0x2234 },
297 { "theta", 0x03b8 },
298 { "thetasym", 0x03d1 },
299 { "thinsp", 0x2009 },
300 { "thorn", 0x00fe },
301 { "tilde", 0x02dc },
302 { "times", 0x00d7 },
303 { "trade", 0x2122 },
304 { "uArr", 0x21d1 },
305 { "uacute", 0x00fa },
306 { "uarr", 0x2191 },
307 { "ucirc", 0x00fb },
308 { "ugrave", 0x00f9 },
309 { "uml", 0x00a8 },
310 { "upsih", 0x03d2 },
311 { "upsilon", 0x03c5 },
312 { "uuml", 0x00fc },
313 { "weierp", 0x2118 },
314 { "xi", 0x03be },
315 { "yacute", 0x00fd },
316 { "yen", 0x00a5 },
317 { "yuml", 0x00ff },
318 { "zeta", 0x03b6 },
319 { "zwj", 0x200d },
320 { "zwnj", 0x200c }
321};
322Q_STATIC_ASSERT(MAX_ENTITY == sizeof entities / sizeof *entities);
323
324#if defined(Q_CC_MSVC) && _MSC_VER < 1600
325bool operator<(const QTextHtmlEntity &entity1, const QTextHtmlEntity &entity2)
326{
327 return QLatin1String(entity1.name) < QLatin1String(entity2.name);
328}
329#endif
330
331static bool operator<(const QStringRef &entityStr, const QTextHtmlEntity &entity)
332{
333 return entityStr < QLatin1String(entity.name);
334}
335
336static bool operator<(const QTextHtmlEntity &entity, const QStringRef &entityStr)
337{
338 return QLatin1String(entity.name) < entityStr;
339}
340
341static QChar resolveEntity(const QStringRef &entity)
342{
343 const QTextHtmlEntity *start = &entities[0];
344 const QTextHtmlEntity *end = &entities[MAX_ENTITY];
345 const QTextHtmlEntity *e = std::lower_bound(start, end, entity);
346 if (e == end || (entity < *e))
347 return QChar();
348 return e->code;
349}
350
351static const ushort windowsLatin1ExtendedCharacters[0xA0 - 0x80] = {
352 0x20ac, // 0x80
353 0x0081, // 0x81 direct mapping
354 0x201a, // 0x82
355 0x0192, // 0x83
356 0x201e, // 0x84
357 0x2026, // 0x85
358 0x2020, // 0x86
359 0x2021, // 0x87
360 0x02C6, // 0x88
361 0x2030, // 0x89
362 0x0160, // 0x8A
363 0x2039, // 0x8B
364 0x0152, // 0x8C
365 0x008D, // 0x8D direct mapping
366 0x017D, // 0x8E
367 0x008F, // 0x8F directmapping
368 0x0090, // 0x90 directmapping
369 0x2018, // 0x91
370 0x2019, // 0x92
371 0x201C, // 0x93
372 0X201D, // 0x94
373 0x2022, // 0x95
374 0x2013, // 0x96
375 0x2014, // 0x97
376 0x02DC, // 0x98
377 0x2122, // 0x99
378 0x0161, // 0x9A
379 0x203A, // 0x9B
380 0x0153, // 0x9C
381 0x009D, // 0x9D direct mapping
382 0x017E, // 0x9E
383 0x0178 // 0x9F
384};
385
386// the displayMode value is according to the what are blocks in the piecetable, not
387// what the w3c defines.
388static const QTextHtmlElement elements[Html_NumElements]= {
389 { "a", Html_a, QTextHtmlElement::DisplayInline },
390 { "address", Html_address, QTextHtmlElement::DisplayInline },
391 { "b", Html_b, QTextHtmlElement::DisplayInline },
392 { "big", Html_big, QTextHtmlElement::DisplayInline },
393 { "blockquote", Html_blockquote, QTextHtmlElement::DisplayBlock },
394 { "body", Html_body, QTextHtmlElement::DisplayBlock },
395 { "br", Html_br, QTextHtmlElement::DisplayInline },
396 { "caption", Html_caption, QTextHtmlElement::DisplayBlock },
397 { "center", Html_center, QTextHtmlElement::DisplayBlock },
398 { "cite", Html_cite, QTextHtmlElement::DisplayInline },
399 { "code", Html_code, QTextHtmlElement::DisplayInline },
400 { "dd", Html_dd, QTextHtmlElement::DisplayBlock },
401 { "dfn", Html_dfn, QTextHtmlElement::DisplayInline },
402 { "div", Html_div, QTextHtmlElement::DisplayBlock },
403 { "dl", Html_dl, QTextHtmlElement::DisplayBlock },
404 { "dt", Html_dt, QTextHtmlElement::DisplayBlock },
405 { "em", Html_em, QTextHtmlElement::DisplayInline },
406 { "font", Html_font, QTextHtmlElement::DisplayInline },
407 { "h1", Html_h1, QTextHtmlElement::DisplayBlock },
408 { "h2", Html_h2, QTextHtmlElement::DisplayBlock },
409 { "h3", Html_h3, QTextHtmlElement::DisplayBlock },
410 { "h4", Html_h4, QTextHtmlElement::DisplayBlock },
411 { "h5", Html_h5, QTextHtmlElement::DisplayBlock },
412 { "h6", Html_h6, QTextHtmlElement::DisplayBlock },
413 { "head", Html_head, QTextHtmlElement::DisplayNone },
414 { "hr", Html_hr, QTextHtmlElement::DisplayBlock },
415 { "html", Html_html, QTextHtmlElement::DisplayInline },
416 { "i", Html_i, QTextHtmlElement::DisplayInline },
417 { "img", Html_img, QTextHtmlElement::DisplayInline },
418 { "kbd", Html_kbd, QTextHtmlElement::DisplayInline },
419 { "li", Html_li, QTextHtmlElement::DisplayBlock },
420 { "link", Html_link, QTextHtmlElement::DisplayNone },
421 { "meta", Html_meta, QTextHtmlElement::DisplayNone },
422 { "nobr", Html_nobr, QTextHtmlElement::DisplayInline },
423 { "ol", Html_ol, QTextHtmlElement::DisplayBlock },
424 { "p", Html_p, QTextHtmlElement::DisplayBlock },
425 { "pre", Html_pre, QTextHtmlElement::DisplayBlock },
426 { "qt", Html_body /*deliberate mapping*/, QTextHtmlElement::DisplayBlock },
427 { "s", Html_s, QTextHtmlElement::DisplayInline },
428 { "samp", Html_samp, QTextHtmlElement::DisplayInline },
429 { "script", Html_script, QTextHtmlElement::DisplayNone },
430 { "small", Html_small, QTextHtmlElement::DisplayInline },
431 { "span", Html_span, QTextHtmlElement::DisplayInline },
432 { "strong", Html_strong, QTextHtmlElement::DisplayInline },
433 { "style", Html_style, QTextHtmlElement::DisplayNone },
434 { "sub", Html_sub, QTextHtmlElement::DisplayInline },
435 { "sup", Html_sup, QTextHtmlElement::DisplayInline },
436 { "table", Html_table, QTextHtmlElement::DisplayTable },
437 { "tbody", Html_tbody, QTextHtmlElement::DisplayTable },
438 { "td", Html_td, QTextHtmlElement::DisplayBlock },
439 { "tfoot", Html_tfoot, QTextHtmlElement::DisplayTable },
440 { "th", Html_th, QTextHtmlElement::DisplayBlock },
441 { "thead", Html_thead, QTextHtmlElement::DisplayTable },
442 { "title", Html_title, QTextHtmlElement::DisplayNone },
443 { "tr", Html_tr, QTextHtmlElement::DisplayTable },
444 { "tt", Html_tt, QTextHtmlElement::DisplayInline },
445 { "u", Html_u, QTextHtmlElement::DisplayInline },
446 { "ul", Html_ul, QTextHtmlElement::DisplayBlock },
447 { "var", Html_var, QTextHtmlElement::DisplayInline },
448};
449
450static bool operator<(const QString &str, const QTextHtmlElement &e)
451{
452 return str < QLatin1String(e.name);
453}
454
455static bool operator<(const QTextHtmlElement &e, const QString &str)
456{
457 return QLatin1String(e.name) < str;
458}
459
460static const QTextHtmlElement *lookupElementHelper(const QString &element)
461{
462 const QTextHtmlElement *start = &elements[0];
463 const QTextHtmlElement *end = &elements[Html_NumElements];
464 const QTextHtmlElement *e = std::lower_bound(start, end, element);
465 if ((e == end) || (element < *e))
466 return 0;
467 return e;
468}
469
470int QTextHtmlParser::lookupElement(const QString &element)
471{
472 const QTextHtmlElement *e = lookupElementHelper(element);
473 if (!e)
474 return -1;
475 return e->id;
476}
477
478// quotes newlines as "\\n"
479static QString quoteNewline(const QString &s)
480{
481 QString n = s;
482 n.replace(QLatin1Char('\n'), QLatin1String("\\n"));
483 return n;
484}
485
486QTextHtmlParserNode::QTextHtmlParserNode()
487 : parent(0), id(Html_unknown),
488 cssFloat(QTextFrameFormat::InFlow), hasOwnListStyle(false), hasOwnLineHeightType(false), hasLineHeightMultiplier(false),
489 hasCssListIndent(false), isEmptyParagraph(false), isTextFrame(false), isRootFrame(false),
490 displayMode(QTextHtmlElement::DisplayInline), hasHref(false),
491 listStyle(QTextListFormat::ListStyleUndefined), imageWidth(-1), imageHeight(-1), tableBorder(0),
492 tableCellRowSpan(1), tableCellColSpan(1), tableCellSpacing(2), tableCellPadding(0),
493 borderBrush(Qt::darkGray), borderStyle(QTextFrameFormat::BorderStyle_Outset),
494 userState(-1), cssListIndent(0), wsm(WhiteSpaceModeUndefined)
495{
496 margin[QTextHtmlParser::MarginLeft] = 0;
497 margin[QTextHtmlParser::MarginRight] = 0;
498 margin[QTextHtmlParser::MarginTop] = 0;
499 margin[QTextHtmlParser::MarginBottom] = 0;
500}
501
502void QTextHtmlParser::dumpHtml()
503{
504 for (int i = 0; i < count(); ++i) {
505 qDebug().nospace() << qPrintable(QString(depth(i)*4, QLatin1Char(' ')))
506 << qPrintable(at(i).tag) << ':'
507 << quoteNewline(at(i).text);
508 ;
509 }
510}
511
512QTextHtmlParserNode *QTextHtmlParser::newNode(int parent)
513{
514 QTextHtmlParserNode *lastNode = &nodes.last();
515 QTextHtmlParserNode *newNode = 0;
516
517 bool reuseLastNode = true;
518
519 if (nodes.count() == 1) {
520 reuseLastNode = false;
521 } else if (lastNode->tag.isEmpty()) {
522
523 if (lastNode->text.isEmpty()) {
524 reuseLastNode = true;
525 } else { // last node is a text node (empty tag) with some text
526
527 if (lastNode->text.length() == 1 && lastNode->text.at(0).isSpace()) {
528
529 int lastSibling = count() - 2;
530 while (lastSibling
531 && at(lastSibling).parent != lastNode->parent
532 && at(lastSibling).displayMode == QTextHtmlElement::DisplayInline) {
533 lastSibling = at(lastSibling).parent;
534 }
535
536 if (at(lastSibling).displayMode == QTextHtmlElement::DisplayInline) {
537 reuseLastNode = false;
538 } else {
539 reuseLastNode = true;
540 }
541 } else {
542 // text node with real (non-whitespace) text -> nothing to re-use
543 reuseLastNode = false;
544 }
545
546 }
547
548 } else {
549 // last node had a proper tag -> nothing to re-use
550 reuseLastNode = false;
551 }
552
553 if (reuseLastNode) {
554 newNode = lastNode;
555 newNode->tag.clear();
556 newNode->text.clear();
557 newNode->id = Html_unknown;
558 } else {
559 nodes.resize(nodes.size() + 1);
560 newNode = &nodes.last();
561 }
562
563 newNode->parent = parent;
564 return newNode;
565}
566
567void QTextHtmlParser::parse(const QString &text, const QTextDocument *_resourceProvider)
568{
569 nodes.clear();
570 nodes.resize(1);
571 txt = text;
572 pos = 0;
573 len = txt.length();
574 textEditMode = false;
575 resourceProvider = _resourceProvider;
576 parse();
577 //dumpHtml();
578}
579
580int QTextHtmlParser::depth(int i) const
581{
582 int depth = 0;
583 while (i) {
584 i = at(i).parent;
585 ++depth;
586 }
587 return depth;
588}
589
590int QTextHtmlParser::margin(int i, int mar) const {
591 int m = 0;
592 const QTextHtmlParserNode *node;
593 if (mar == MarginLeft
594 || mar == MarginRight) {
595 while (i) {
596 node = &at(i);
597 if (!node->isBlock() && node->id != Html_table)
598 break;
599 if (node->isTableCell())
600 break;
601 m += node->margin[mar];
602 i = node->parent;
603 }
604 }
605 return m;
606}
607
608int QTextHtmlParser::topMargin(int i) const
609{
610 if (!i)
611 return 0;
612 return at(i).margin[MarginTop];
613}
614
615int QTextHtmlParser::bottomMargin(int i) const
616{
617 if (!i)
618 return 0;
619 return at(i).margin[MarginBottom];
620}
621
622void QTextHtmlParser::eatSpace()
623{
624 while (pos < len && txt.at(pos).isSpace() && txt.at(pos) != QChar::ParagraphSeparator)
625 pos++;
626}
627
628void QTextHtmlParser::parse()
629{
630 while (pos < len) {
631 QChar c = txt.at(pos++);
632 if (c == QLatin1Char('<')) {
633 parseTag();
634 } else if (c == QLatin1Char('&')) {
635 nodes.last().text += parseEntity();
636 } else {
637 nodes.last().text += c;
638 }
639 }
640}
641
642// parses a tag after "<"
643void QTextHtmlParser::parseTag()
644{
645 eatSpace();
646
647 // handle comments and other exclamation mark declarations
648 if (hasPrefix(QLatin1Char('!'))) {
649 parseExclamationTag();
650 if (nodes.last().wsm != QTextHtmlParserNode::WhiteSpacePre
651 && nodes.last().wsm != QTextHtmlParserNode::WhiteSpacePreWrap
652 && !textEditMode)
653 eatSpace();
654 return;
655 }
656
657 // if close tag just close
658 if (hasPrefix(QLatin1Char('/'))) {
659 if (nodes.last().id == Html_style) {
660#ifndef QT_NO_CSSPARSER
661 QCss::Parser parser(nodes.constLast().text);
662 QCss::StyleSheet sheet;
663 sheet.origin = QCss::StyleSheetOrigin_Author;
664 parser.parse(&sheet, Qt::CaseInsensitive);
665 inlineStyleSheets.append(sheet);
666 resolveStyleSheetImports(sheet);
667#endif
668 }
669 parseCloseTag();
670 return;
671 }
672
673 int p = last();
674 while (p && at(p).tag.size() == 0)
675 p = at(p).parent;
676
677 QTextHtmlParserNode *node = newNode(p);
678
679 // parse tag name
680 node->tag = parseWord().toLower();
681
682 const QTextHtmlElement *elem = lookupElementHelper(node->tag);
683 if (elem) {
684 node->id = elem->id;
685 node->displayMode = elem->displayMode;
686 } else {
687 node->id = Html_unknown;
688 }
689
690 node->attributes.clear();
691 // _need_ at least one space after the tag name, otherwise there can't be attributes
692 if (pos < len && txt.at(pos).isSpace())
693 node->attributes = parseAttributes();
694
695 // resolveParent() may have to change the order in the tree and
696 // insert intermediate nodes for buggy HTML, so re-initialize the 'node'
697 // pointer through the return value
698 node = resolveParent();
699 resolveNode();
700
701#ifndef QT_NO_CSSPARSER
702 const int nodeIndex = nodes.count() - 1; // this new node is always the last
703 node->applyCssDeclarations(declarationsForNode(nodeIndex), resourceProvider);
704#endif
705 applyAttributes(node->attributes);
706
707 // finish tag
708 bool tagClosed = false;
709 while (pos < len && txt.at(pos) != QLatin1Char('>')) {
710 if (txt.at(pos) == QLatin1Char('/'))
711 tagClosed = true;
712
713 pos++;
714 }
715 pos++;
716
717 // in a white-space preserving environment strip off a initial newline
718 // since the element itself already generates a newline
719 if ((node->wsm == QTextHtmlParserNode::WhiteSpacePre
720 || node->wsm == QTextHtmlParserNode::WhiteSpacePreWrap)
721 && node->isBlock()) {
722 if (pos < len - 1 && txt.at(pos) == QLatin1Char('\n'))
723 ++pos;
724 }
725
726 if (node->mayNotHaveChildren() || tagClosed) {
727 newNode(node->parent);
728 resolveNode();
729 }
730}
731
732// parses a tag beginning with "/"
733void QTextHtmlParser::parseCloseTag()
734{
735 ++pos;
736 QString tag = parseWord().toLower().trimmed();
737 while (pos < len) {
738 QChar c = txt.at(pos++);
739 if (c == QLatin1Char('>'))
740 break;
741 }
742
743 // find corresponding open node
744 int p = last();
745 if (p > 0
746 && at(p - 1).tag == tag
747 && at(p - 1).mayNotHaveChildren())
748 p--;
749
750 while (p && at(p).tag != tag)
751 p = at(p).parent;
752
753 // simply ignore the tag if we can't find
754 // a corresponding open node, for broken
755 // html such as <font>blah</font></font>
756 if (!p)
757 return;
758
759 // in a white-space preserving environment strip off a trailing newline
760 // since the closing of the opening block element will automatically result
761 // in a new block for elements following the <pre>
762 // ...foo\n</pre><p>blah -> foo</pre><p>blah
763 if ((at(p).wsm == QTextHtmlParserNode::WhiteSpacePre
764 || at(p).wsm == QTextHtmlParserNode::WhiteSpacePreWrap)
765 && at(p).isBlock()) {
766 if (at(last()).text.endsWith(QLatin1Char('\n')))
767 nodes[last()].text.chop(1);
768 }
769
770 newNode(at(p).parent);
771 resolveNode();
772}
773
774// parses a tag beginning with "!"
775void QTextHtmlParser::parseExclamationTag()
776{
777 ++pos;
778 if (hasPrefix(QLatin1Char('-'),1) && hasPrefix(QLatin1Char('-'),2)) {
779 pos += 3;
780 // eat comments
781 int end = txt.indexOf(QLatin1String("-->"), pos);
782 pos = (end >= 0 ? end + 3 : len);
783 } else {
784 // eat internal tags
785 while (pos < len) {
786 QChar c = txt.at(pos++);
787 if (c == QLatin1Char('>'))
788 break;
789 }
790 }
791}
792
793// parses an entity after "&", and returns it
794QString QTextHtmlParser::parseEntity()
795{
796 const int recover = pos;
797 int entityLen = 0;
798 QStringRef entity;
799 while (pos < len) {
800 QChar c = txt.at(pos++);
801 if (c.isSpace() || pos - recover > 9) {
802 goto error;
803 }
804 if (c == QLatin1Char(';'))
805 break;
806 ++entityLen;
807 }
808 if (entityLen) {
809 entity = QStringRef(&txt, recover, entityLen);
810 QChar resolved = resolveEntity(entity);
811 if (!resolved.isNull())
812 return QString(resolved);
813
814 if (entityLen > 1 && entity.at(0) == QLatin1Char('#')) {
815 entity = entity.mid(1); // removing leading #
816
817 int base = 10;
818 bool ok = false;
819
820 if (entity.at(0).toLower() == QLatin1Char('x')) { // hex entity?
821 entity = entity.mid(1);
822 base = 16;
823 }
824
825 uint uc = entity.toUInt(&ok, base);
826 if (ok) {
827 if (uc >= 0x80 && uc < 0x80 + (sizeof(windowsLatin1ExtendedCharacters)/sizeof(windowsLatin1ExtendedCharacters[0])))
828 uc = windowsLatin1ExtendedCharacters[uc - 0x80];
829 QString str;
830 if (QChar::requiresSurrogates(uc)) {
831 str += QChar(QChar::highSurrogate(uc));
832 str += QChar(QChar::lowSurrogate(uc));
833 } else {
834 str = QChar(uc);
835 }
836 return str;
837 }
838 }
839 }
840error:
841 pos = recover;
842 return QLatin1String("&");
843}
844
845// parses one word, possibly quoted, and returns it
846QString QTextHtmlParser::parseWord()
847{
848 QString word;
849 if (hasPrefix(QLatin1Char('\"'))) { // double quotes
850 ++pos;
851 while (pos < len) {
852 QChar c = txt.at(pos++);
853 if (c == QLatin1Char('\"'))
854 break;
855 else if (c == QLatin1Char('&'))
856 word += parseEntity();
857 else
858 word += c;
859 }
860 } else if (hasPrefix(QLatin1Char('\''))) { // single quotes
861 ++pos;
862 while (pos < len) {
863 QChar c = txt.at(pos++);
864 // Allow for escaped single quotes as they may be part of the string
865 if (c == QLatin1Char('\'') && (txt.length() > 1 && txt.at(pos - 2) != QLatin1Char('\\')))
866 break;
867 else
868 word += c;
869 }
870 } else { // normal text
871 while (pos < len) {
872 QChar c = txt.at(pos++);
873 if (c == QLatin1Char('>')
874 || (c == QLatin1Char('/') && hasPrefix(QLatin1Char('>'), 1))
875 || c == QLatin1Char('<')
876 || c == QLatin1Char('=')
877 || c.isSpace()) {
878 --pos;
879 break;
880 }
881 if (c == QLatin1Char('&'))
882 word += parseEntity();
883 else
884 word += c;
885 }
886 }
887 return word;
888}
889
890// gives the new node the right parent
891QTextHtmlParserNode *QTextHtmlParser::resolveParent()
892{
893 QTextHtmlParserNode *node = &nodes.last();
894
895 int p = node->parent;
896
897 // Excel gives us buggy HTML with just tr without surrounding table tags
898 // or with just td tags
899
900 if (node->id == Html_td) {
901 int n = p;
902 while (n && at(n).id != Html_tr)
903 n = at(n).parent;
904
905 if (!n) {
906 nodes.insert(nodes.count() - 1, QTextHtmlParserNode());
907 nodes.insert(nodes.count() - 1, QTextHtmlParserNode());
908
909 QTextHtmlParserNode *table = &nodes[nodes.count() - 3];
910 table->parent = p;
911 table->id = Html_table;
912 table->tag = QLatin1String("table");
913 table->children.append(nodes.count() - 2); // add row as child
914
915 QTextHtmlParserNode *row = &nodes[nodes.count() - 2];
916 row->parent = nodes.count() - 3; // table as parent
917 row->id = Html_tr;
918 row->tag = QLatin1String("tr");
919
920 p = nodes.count() - 2;
921 node = &nodes.last(); // re-initialize pointer
922 }
923 }
924
925 if (node->id == Html_tr) {
926 int n = p;
927 while (n && at(n).id != Html_table)
928 n = at(n).parent;
929
930 if (!n) {
931 nodes.insert(nodes.count() - 1, QTextHtmlParserNode());
932 QTextHtmlParserNode *table = &nodes[nodes.count() - 2];
933 table->parent = p;
934 table->id = Html_table;
935 table->tag = QLatin1String("table");
936 p = nodes.count() - 2;
937 node = &nodes.last(); // re-initialize pointer
938 }
939 }
940
941 // permit invalid html by letting block elements be children
942 // of inline elements with the exception of paragraphs:
943 //
944 // a new paragraph closes parent inline elements (while loop),
945 // unless they themselves are children of a non-paragraph block
946 // element (if statement)
947 //
948 // For example:
949 //
950 // <body><p><b>Foo<p>Bar <-- second <p> implicitly closes <b> that
951 // belongs to the first <p>. The self-nesting
952 // check further down prevents the second <p>
953 // from nesting into the first one then.
954 // so Bar is not bold.
955 //
956 // <body><b><p>Foo <-- Foo should be bold.
957 //
958 // <body><b><p>Foo<p>Bar <-- Foo and Bar should be bold.
959 //
960 if (node->id == Html_p) {
961 while (p && !at(p).isBlock())
962 p = at(p).parent;
963
964 if (!p || at(p).id != Html_p)
965 p = node->parent;
966 }
967
968 // some elements are not self nesting
969 if (node->id == at(p).id
970 && node->isNotSelfNesting())
971 p = at(p).parent;
972
973 // some elements are not allowed in certain contexts
974 while ((p && !node->allowedInContext(at(p).id))
975 // ### make new styles aware of empty tags
976 || at(p).mayNotHaveChildren()
977 ) {
978 p = at(p).parent;
979 }
980
981 node->parent = p;
982
983 // makes it easier to traverse the tree, later
984 nodes[p].children.append(nodes.count() - 1);
985 return node;
986}
987
988// sets all properties on the new node
989void QTextHtmlParser::resolveNode()
990{
991 QTextHtmlParserNode *node = &nodes.last();
992 const QTextHtmlParserNode *parent = &nodes.at(node->parent);
993 node->initializeProperties(parent, this);
994}
995
996bool QTextHtmlParserNode::isNestedList(const QTextHtmlParser *parser) const
997{
998 if (!isListStart())
999 return false;
1000
1001 int p = parent;
1002 while (p) {
1003 if (parser->at(p).isListStart())
1004 return true;
1005 p = parser->at(p).parent;
1006 }
1007 return false;
1008}
1009
1010void QTextHtmlParserNode::initializeProperties(const QTextHtmlParserNode *parent, const QTextHtmlParser *parser)
1011{
1012 // inherit properties from parent element
1013 charFormat = parent->charFormat;
1014
1015 if (id == Html_html)
1016 blockFormat.setLayoutDirection(Qt::LeftToRight); // HTML default
1017 else if (parent->blockFormat.hasProperty(QTextFormat::LayoutDirection))
1018 blockFormat.setLayoutDirection(parent->blockFormat.layoutDirection());
1019
1020 if (parent->displayMode == QTextHtmlElement::DisplayNone)
1021 displayMode = QTextHtmlElement::DisplayNone;
1022
1023 if (parent->id != Html_table || id == Html_caption) {
1024 if (parent->blockFormat.hasProperty(QTextFormat::BlockAlignment))
1025 blockFormat.setAlignment(parent->blockFormat.alignment());
1026 else
1027 blockFormat.clearProperty(QTextFormat::BlockAlignment);
1028 }
1029 // we don't paint per-row background colors, yet. so as an
1030 // exception inherit the background color here
1031 // we also inherit the background between inline elements
1032 if ((parent->id != Html_tr || !isTableCell())
1033 && (displayMode != QTextHtmlElement::DisplayInline || parent->displayMode != QTextHtmlElement::DisplayInline)) {
1034 charFormat.clearProperty(QTextFormat::BackgroundBrush);
1035 }
1036
1037 listStyle = parent->listStyle;
1038 // makes no sense to inherit that property, a named anchor is a single point
1039 // in the document, which is set by the DocumentFragment
1040 charFormat.clearProperty(QTextFormat::AnchorName);
1041 wsm = parent->wsm;
1042
1043 // initialize remaining properties
1044 margin[QTextHtmlParser::MarginLeft] = 0;
1045 margin[QTextHtmlParser::MarginRight] = 0;
1046 margin[QTextHtmlParser::MarginTop] = 0;
1047 margin[QTextHtmlParser::MarginBottom] = 0;
1048 cssFloat = QTextFrameFormat::InFlow;
1049
1050 for (int i = 0; i < 4; ++i)
1051 padding[i] = -1;
1052
1053 // set element specific attributes
1054 switch (id) {
1055 case Html_a:
1056 for (int i = 0; i < attributes.count(); i += 2) {
1057 const QString key = attributes.at(i);
1058 if (key.compare(QLatin1String("href"), Qt::CaseInsensitive) == 0
1059 && !attributes.at(i + 1).isEmpty()) {
1060 hasHref = true;
1061 }
1062 }
1063 charFormat.setAnchor(true);
1064 break;
1065 case Html_big:
1066 charFormat.setProperty(QTextFormat::FontSizeAdjustment, int(1));
1067 break;
1068 case Html_small:
1069 charFormat.setProperty(QTextFormat::FontSizeAdjustment, int(-1));
1070 break;
1071 case Html_h1:
1072 charFormat.setProperty(QTextFormat::FontSizeAdjustment, int(3));
1073 margin[QTextHtmlParser::MarginTop] = 18;
1074 margin[QTextHtmlParser::MarginBottom] = 12;
1075 break;
1076 case Html_h2:
1077 charFormat.setProperty(QTextFormat::FontSizeAdjustment, int(2));
1078 margin[QTextHtmlParser::MarginTop] = 16;
1079 margin[QTextHtmlParser::MarginBottom] = 12;
1080 break;
1081 case Html_h3:
1082 charFormat.setProperty(QTextFormat::FontSizeAdjustment, int(1));
1083 margin[QTextHtmlParser::MarginTop] = 14;
1084 margin[QTextHtmlParser::MarginBottom] = 12;
1085 break;
1086 case Html_h4:
1087 charFormat.setProperty(QTextFormat::FontSizeAdjustment, int(0));
1088 margin[QTextHtmlParser::MarginTop] = 12;
1089 margin[QTextHtmlParser::MarginBottom] = 12;
1090 break;
1091 case Html_h5:
1092 charFormat.setProperty(QTextFormat::FontSizeAdjustment, int(-1));
1093 margin[QTextHtmlParser::MarginTop] = 12;
1094 margin[QTextHtmlParser::MarginBottom] = 4;
1095 break;
1096 case Html_p:
1097 margin[QTextHtmlParser::MarginTop] = 12;
1098 margin[QTextHtmlParser::MarginBottom] = 12;
1099 break;
1100 case Html_ul:
1101 // nested lists don't have margins, except for the toplevel one
1102 if (!isNestedList(parser)) {
1103 margin[QTextHtmlParser::MarginTop] = 12;
1104 margin[QTextHtmlParser::MarginBottom] = 12;
1105 }
1106 // no left margin as we use indenting instead
1107 break;
1108 case Html_ol:
1109 // nested lists don't have margins, except for the toplevel one
1110 if (!isNestedList(parser)) {
1111 margin[QTextHtmlParser::MarginTop] = 12;
1112 margin[QTextHtmlParser::MarginBottom] = 12;
1113 }
1114 // no left margin as we use indenting instead
1115 break;
1116 case Html_br:
1117 text = QChar(QChar::LineSeparator);
1118 break;
1119 case Html_pre:
1120 margin[QTextHtmlParser::MarginTop] = 12;
1121 margin[QTextHtmlParser::MarginBottom] = 12;
1122 break;
1123 case Html_blockquote:
1124 margin[QTextHtmlParser::MarginTop] = 12;
1125 margin[QTextHtmlParser::MarginBottom] = 12;
1126 margin[QTextHtmlParser::MarginLeft] = 40;
1127 margin[QTextHtmlParser::MarginRight] = 40;
1128 blockFormat.setProperty(QTextFormat::BlockQuoteLevel, 1);
1129 break;
1130 case Html_dl:
1131 margin[QTextHtmlParser::MarginTop] = 8;
1132 margin[QTextHtmlParser::MarginBottom] = 8;
1133 break;
1134 case Html_dd:
1135 margin[QTextHtmlParser::MarginLeft] = 30;
1136 break;
1137 default: break;
1138 }
1139}
1140
1141#ifndef QT_NO_CSSPARSER
1142void QTextHtmlParserNode::setListStyle(const QVector<QCss::Value> &cssValues)
1143{
1144 for (int i = 0; i < cssValues.count(); ++i) {
1145 if (cssValues.at(i).type == QCss::Value::KnownIdentifier) {
1146 switch (static_cast<QCss::KnownValue>(cssValues.at(i).variant.toInt())) {
1147 case QCss::Value_None: hasOwnListStyle = true; listStyle = QTextListFormat::ListStyleUndefined; break;
1148 case QCss::Value_Disc: hasOwnListStyle = true; listStyle = QTextListFormat::ListDisc; break;
1149 case QCss::Value_Square: hasOwnListStyle = true; listStyle = QTextListFormat::ListSquare; break;
1150 case QCss::Value_Circle: hasOwnListStyle = true; listStyle = QTextListFormat::ListCircle; break;
1151 case QCss::Value_Decimal: hasOwnListStyle = true; listStyle = QTextListFormat::ListDecimal; break;
1152 case QCss::Value_LowerAlpha: hasOwnListStyle = true; listStyle = QTextListFormat::ListLowerAlpha; break;
1153 case QCss::Value_UpperAlpha: hasOwnListStyle = true; listStyle = QTextListFormat::ListUpperAlpha; break;
1154 case QCss::Value_LowerRoman: hasOwnListStyle = true; listStyle = QTextListFormat::ListLowerRoman; break;
1155 case QCss::Value_UpperRoman: hasOwnListStyle = true; listStyle = QTextListFormat::ListUpperRoman; break;
1156 default: break;
1157 }
1158 }
1159 }
1160 // allow individual list items to override the style
1161 if (id == Html_li && hasOwnListStyle)
1162 blockFormat.setProperty(QTextFormat::ListStyle, listStyle);
1163}
1164
1165void QTextHtmlParserNode::applyCssDeclarations(const QVector<QCss::Declaration> &declarations, const QTextDocument *resourceProvider)
1166{
1167 QCss::ValueExtractor extractor(declarations);
1168 extractor.extractBox(margin, padding);
1169
1170 for (int i = 0; i < declarations.count(); ++i) {
1171 const QCss::Declaration &decl = declarations.at(i);
1172 if (decl.d->values.isEmpty()) continue;
1173
1174 QCss::KnownValue identifier = QCss::UnknownValue;
1175 if (decl.d->values.first().type == QCss::Value::KnownIdentifier)
1176 identifier = static_cast<QCss::KnownValue>(decl.d->values.first().variant.toInt());
1177
1178 switch (decl.d->propertyId) {
1179 case QCss::BorderColor: borderBrush = QBrush(decl.colorValue()); break;
1180 case QCss::BorderStyles:
1181 if (decl.styleValue() != QCss::BorderStyle_Unknown && decl.styleValue() != QCss::BorderStyle_Native)
1182 borderStyle = static_cast<QTextFrameFormat::BorderStyle>(decl.styleValue() - 1);
1183 break;
1184 case QCss::BorderWidth:
1185 tableBorder = extractor.lengthValue(decl);
1186 break;
1187 case QCss::Color: charFormat.setForeground(decl.colorValue()); break;
1188 case QCss::Float:
1189 cssFloat = QTextFrameFormat::InFlow;
1190 switch (identifier) {
1191 case QCss::Value_Left: cssFloat = QTextFrameFormat::FloatLeft; break;
1192 case QCss::Value_Right: cssFloat = QTextFrameFormat::FloatRight; break;
1193 default: break;
1194 }
1195 break;
1196 case QCss::QtBlockIndent:
1197 blockFormat.setIndent(decl.d->values.first().variant.toInt());
1198 break;
1199 case QCss::QtLineHeightType: {
1200 QString lineHeightTypeName = decl.d->values.first().variant.toString();
1201 QTextBlockFormat::LineHeightTypes lineHeightType;
1202 if (lineHeightTypeName.compare(QLatin1String("proportional"), Qt::CaseInsensitive) == 0)
1203 lineHeightType = QTextBlockFormat::ProportionalHeight;
1204 else if (lineHeightTypeName.compare(QLatin1String("fixed"), Qt::CaseInsensitive) == 0)
1205 lineHeightType = QTextBlockFormat::FixedHeight;
1206 else if (lineHeightTypeName.compare(QLatin1String("minimum"), Qt::CaseInsensitive) == 0)
1207 lineHeightType = QTextBlockFormat::MinimumHeight;
1208 else if (lineHeightTypeName.compare(QLatin1String("line-distance"), Qt::CaseInsensitive) == 0)
1209 lineHeightType = QTextBlockFormat::LineDistanceHeight;
1210 else
1211 lineHeightType = QTextBlockFormat::SingleHeight;
1212
1213 if (hasLineHeightMultiplier) {
1214 qreal lineHeight = blockFormat.lineHeight() / 100.0;
1215 blockFormat.setProperty(QTextBlockFormat::LineHeight, lineHeight);
1216 }
1217
1218 blockFormat.setProperty(QTextBlockFormat::LineHeightType, lineHeightType);
1219 hasOwnLineHeightType = true;
1220 }
1221 break;
1222 case QCss::LineHeight: {
1223 qreal lineHeight;
1224 QTextBlockFormat::LineHeightTypes lineHeightType;
1225 if (decl.realValue(&lineHeight, "px")) {
1226 lineHeightType = QTextBlockFormat::MinimumHeight;
1227 } else {
1228 bool ok;
1229 QCss::Value cssValue = decl.d->values.first();
1230 QString value = cssValue.toString();
1231 lineHeight = value.toDouble(&ok);
1232 if (ok) {
1233 if (!hasOwnLineHeightType && cssValue.type == QCss::Value::Number) {
1234 lineHeight *= 100.0;
1235 hasLineHeightMultiplier = true;
1236 }
1237 lineHeightType = QTextBlockFormat::ProportionalHeight;
1238 } else {
1239 lineHeight = 0.0;
1240 lineHeightType = QTextBlockFormat::SingleHeight;
1241 }
1242 }
1243
1244 // Only override line height type if specified in same node
1245 if (hasOwnLineHeightType)
1246 lineHeightType = QTextBlockFormat::LineHeightTypes(blockFormat.lineHeightType());
1247
1248 blockFormat.setLineHeight(lineHeight, lineHeightType);
1249 break;
1250 }
1251 case QCss::TextIndent: {
1252 qreal indent = 0;
1253 if (decl.realValue(&indent, "px"))
1254 blockFormat.setTextIndent(indent);
1255 break; }
1256 case QCss::QtListIndent:
1257 if (decl.intValue(&cssListIndent))
1258 hasCssListIndent = true;
1259 break;
1260 case QCss::QtParagraphType:
1261 if (decl.d->values.first().variant.toString().compare(QLatin1String("empty"), Qt::CaseInsensitive) == 0)
1262 isEmptyParagraph = true;
1263 break;
1264 case QCss::QtTableType:
1265 if (decl.d->values.first().variant.toString().compare(QLatin1String("frame"), Qt::CaseInsensitive) == 0)
1266 isTextFrame = true;
1267 else if (decl.d->values.first().variant.toString().compare(QLatin1String("root"), Qt::CaseInsensitive) == 0) {
1268 isTextFrame = true;
1269 isRootFrame = true;
1270 }
1271 break;
1272 case QCss::QtUserState:
1273 userState = decl.d->values.first().variant.toInt();
1274 break;
1275 case QCss::Whitespace:
1276 switch (identifier) {
1277 case QCss::Value_Normal: wsm = QTextHtmlParserNode::WhiteSpaceNormal; break;
1278 case QCss::Value_Pre: wsm = QTextHtmlParserNode::WhiteSpacePre; break;
1279 case QCss::Value_NoWrap: wsm = QTextHtmlParserNode::WhiteSpaceNoWrap; break;
1280 case QCss::Value_PreWrap: wsm = QTextHtmlParserNode::WhiteSpacePreWrap; break;
1281 default: break;
1282 }
1283 break;
1284 case QCss::VerticalAlignment:
1285 switch (identifier) {
1286 case QCss::Value_Sub: charFormat.setVerticalAlignment(QTextCharFormat::AlignSubScript); break;
1287 case QCss::Value_Super: charFormat.setVerticalAlignment(QTextCharFormat::AlignSuperScript); break;
1288 case QCss::Value_Middle: charFormat.setVerticalAlignment(QTextCharFormat::AlignMiddle); break;
1289 case QCss::Value_Top: charFormat.setVerticalAlignment(QTextCharFormat::AlignTop); break;
1290 case QCss::Value_Bottom: charFormat.setVerticalAlignment(QTextCharFormat::AlignBottom); break;
1291 default: charFormat.setVerticalAlignment(QTextCharFormat::AlignNormal); break;
1292 }
1293 break;
1294 case QCss::PageBreakBefore:
1295 switch (identifier) {
1296 case QCss::Value_Always: blockFormat.setPageBreakPolicy(blockFormat.pageBreakPolicy() | QTextFormat::PageBreak_AlwaysBefore); break;
1297 case QCss::Value_Auto: blockFormat.setPageBreakPolicy(blockFormat.pageBreakPolicy() & ~QTextFormat::PageBreak_AlwaysBefore); break;
1298 default: break;
1299 }
1300 break;
1301 case QCss::PageBreakAfter:
1302 switch (identifier) {
1303 case QCss::Value_Always: blockFormat.setPageBreakPolicy(blockFormat.pageBreakPolicy() | QTextFormat::PageBreak_AlwaysAfter); break;
1304 case QCss::Value_Auto: blockFormat.setPageBreakPolicy(blockFormat.pageBreakPolicy() & ~QTextFormat::PageBreak_AlwaysAfter); break;
1305 default: break;
1306 }
1307 break;
1308 case QCss::TextUnderlineStyle:
1309 switch (identifier) {
1310 case QCss::Value_None: charFormat.setUnderlineStyle(QTextCharFormat::NoUnderline); break;
1311 case QCss::Value_Solid: charFormat.setUnderlineStyle(QTextCharFormat::SingleUnderline); break;
1312 case QCss::Value_Dashed: charFormat.setUnderlineStyle(QTextCharFormat::DashUnderline); break;
1313 case QCss::Value_Dotted: charFormat.setUnderlineStyle(QTextCharFormat::DotLine); break;
1314 case QCss::Value_DotDash: charFormat.setUnderlineStyle(QTextCharFormat::DashDotLine); break;
1315 case QCss::Value_DotDotDash: charFormat.setUnderlineStyle(QTextCharFormat::DashDotDotLine); break;
1316 case QCss::Value_Wave: charFormat.setUnderlineStyle(QTextCharFormat::WaveUnderline); break;
1317 default: break;
1318 }
1319 break;
1320 case QCss::ListStyleType:
1321 case QCss::ListStyle:
1322 setListStyle(decl.d->values);
1323 break;
1324 case QCss::QtListNumberPrefix:
1325 textListNumberPrefix = decl.d->values.first().variant.toString();
1326 break;
1327 case QCss::QtListNumberSuffix:
1328 textListNumberSuffix = decl.d->values.first().variant.toString();
1329 break;
1330 case QCss::TextAlignment:
1331 switch (identifier) {
1332 case QCss::Value_Left: blockFormat.setAlignment(Qt::AlignLeft); break;
1333 case QCss::Value_Center: blockFormat.setAlignment(Qt::AlignCenter); break;
1334 case QCss::Value_Right: blockFormat.setAlignment(Qt::AlignRight); break;
1335 default: break;
1336 }
1337 break;
1338
1339 case QCss::QtForegroundTextureCacheKey:
1340 {
1341 if (resourceProvider != nullptr && resourceProvider->docHandle() != nullptr) {
1342 bool ok;
1343 qint64 searchKey = decl.d->values.first().variant.toLongLong(&ok);
1344 if (ok)
1345 applyForegroundImage(searchKey, resourceProvider);
1346 }
1347 break;
1348 }
1349 default: break;
1350 }
1351 }
1352
1353 QFont f;
1354 int adjustment = -255;
1355 extractor.extractFont(&f, &adjustment);
1356 charFormat.setFont(f, QTextCharFormat::FontPropertiesSpecifiedOnly);
1357
1358 if (adjustment >= -1)
1359 charFormat.setProperty(QTextFormat::FontSizeAdjustment, adjustment);
1360
1361 {
1362 Qt::Alignment ignoredAlignment;
1363 QCss::Repeat ignoredRepeat;
1364 QString bgImage;
1365 QBrush bgBrush;
1366 QCss::Origin ignoredOrigin, ignoredClip;
1367 QCss::Attachment ignoredAttachment;
1368 extractor.extractBackground(&bgBrush, &bgImage, &ignoredRepeat, &ignoredAlignment,
1369 &ignoredOrigin, &ignoredAttachment, &ignoredClip);
1370
1371 if (!bgImage.isEmpty() && resourceProvider) {
1372 applyBackgroundImage(bgImage, resourceProvider);
1373 } else if (bgBrush.style() != Qt::NoBrush) {
1374 charFormat.setBackground(bgBrush);
1375 }
1376 }
1377}
1378
1379#endif // QT_NO_CSSPARSER
1380
1381void QTextHtmlParserNode::applyForegroundImage(qint64 searchKey, const QTextDocument *resourceProvider)
1382{
1383 QTextDocumentPrivate *priv = resourceProvider->docHandle();
1384 for (int i = 0; i < priv->formats.numFormats(); ++i) {
1385 QTextCharFormat format = priv->formats.charFormat(i);
1386 if (format.isValid()) {
1387 QBrush brush = format.foreground();
1388 if (brush.style() == Qt::TexturePattern) {
1389 const bool isPixmap = qHasPixmapTexture(brush);
1390
1391 if (isPixmap && QCoreApplication::instance()->thread() != QThread::currentThread()) {
1392 qWarning("Can't apply QPixmap outside of GUI thread");
1393 return;
1394 }
1395
1396 const qint64 cacheKey = isPixmap ? brush.texture().cacheKey() : brush.textureImage().cacheKey();
1397 if (cacheKey == searchKey) {
1398 QBrush b;
1399 if (isPixmap)
1400 b.setTexture(brush.texture());
1401 else
1402 b.setTextureImage(brush.textureImage());
1403 b.setStyle(Qt::TexturePattern);
1404 charFormat.setForeground(b);
1405 }
1406 }
1407 }
1408 }
1409
1410}
1411
1412void QTextHtmlParserNode::applyBackgroundImage(const QString &url, const QTextDocument *resourceProvider)
1413{
1414 if (!url.isEmpty() && resourceProvider) {
1415 QVariant val = resourceProvider->resource(QTextDocument::ImageResource, url);
1416
1417 if (QCoreApplication::instance()->thread() != QThread::currentThread()) {
1418 // must use images in non-GUI threads
1419 if (val.type() == QVariant::Image) {
1420 QImage image = qvariant_cast<QImage>(val);
1421 charFormat.setBackground(image);
1422 } else if (val.type() == QVariant::ByteArray) {
1423 QImage image;
1424 if (image.loadFromData(val.toByteArray())) {
1425 charFormat.setBackground(image);
1426 }
1427 }
1428 } else {
1429 if (val.type() == QVariant::Image || val.type() == QVariant::Pixmap) {
1430 charFormat.setBackground(qvariant_cast<QPixmap>(val));
1431 } else if (val.type() == QVariant::ByteArray) {
1432 QPixmap pm;
1433 if (pm.loadFromData(val.toByteArray())) {
1434 charFormat.setBackground(pm);
1435 }
1436 }
1437 }
1438 }
1439 if (!url.isEmpty())
1440 charFormat.setProperty(QTextFormat::BackgroundImageUrl, url);
1441}
1442
1443bool QTextHtmlParserNode::hasOnlyWhitespace() const
1444{
1445 for (int i = 0; i < text.count(); ++i)
1446 if (!text.at(i).isSpace() || text.at(i) == QChar::LineSeparator)
1447 return false;
1448 return true;
1449}
1450
1451static bool setIntAttribute(int *destination, const QString &value)
1452{
1453 bool ok = false;
1454 int val = value.toInt(&ok);
1455 if (ok)
1456 *destination = val;
1457
1458 return ok;
1459}
1460
1461static bool setFloatAttribute(qreal *destination, const QString &value)
1462{
1463 bool ok = false;
1464 qreal val = value.toDouble(&ok);
1465 if (ok)
1466 *destination = val;
1467
1468 return ok;
1469}
1470
1471static void setWidthAttribute(QTextLength *width, const QString &valueStr)
1472{
1473 bool ok = false;
1474 qreal realVal = valueStr.toDouble(&ok);
1475 if (ok) {
1476 *width = QTextLength(QTextLength::FixedLength, realVal);
1477 } else {
1478 QStringRef value = QStringRef(&valueStr).trimmed();
1479 if (!value.isEmpty() && value.endsWith(QLatin1Char('%'))) {
1480 value.truncate(value.size() - 1);
1481 realVal = value.toDouble(&ok);
1482 if (ok)
1483 *width = QTextLength(QTextLength::PercentageLength, realVal);
1484 }
1485 }
1486}
1487
1488#ifndef QT_NO_CSSPARSER
1489void QTextHtmlParserNode::parseStyleAttribute(const QString &value, const QTextDocument *resourceProvider)
1490{
1491 const QString css = QLatin1String("* {") + value + QLatin1Char('}');
1492 QCss::Parser parser(css);
1493 QCss::StyleSheet sheet;
1494 parser.parse(&sheet, Qt::CaseInsensitive);
1495 if (sheet.styleRules.count() != 1) return;
1496 applyCssDeclarations(sheet.styleRules.at(0).declarations, resourceProvider);
1497}
1498#endif
1499
1500QStringList QTextHtmlParser::parseAttributes()
1501{
1502 QStringList attrs;
1503
1504 while (pos < len) {
1505 eatSpace();
1506 if (hasPrefix(QLatin1Char('>')) || hasPrefix(QLatin1Char('/')))
1507 break;
1508 QString key = parseWord().toLower();
1509 QString value = QLatin1String("1");
1510 if (key.size() == 0)
1511 break;
1512 eatSpace();
1513 if (hasPrefix(QLatin1Char('='))){
1514 pos++;
1515 eatSpace();
1516 value = parseWord();
1517 }
1518 if (value.size() == 0)
1519 continue;
1520 attrs << key << value;
1521 }
1522
1523 return attrs;
1524}
1525
1526void QTextHtmlParser::applyAttributes(const QStringList &attributes)
1527{
1528 // local state variable for qt3 textedit mode
1529 bool seenQt3Richtext = false;
1530 QString linkHref;
1531 QString linkType;
1532
1533 if (attributes.count() % 2 == 1)
1534 return;
1535
1536 QTextHtmlParserNode *node = &nodes.last();
1537
1538 for (int i = 0; i < attributes.count(); i += 2) {
1539 QString key = attributes.at(i);
1540 QString value = attributes.at(i + 1);
1541
1542 switch (node->id) {
1543 case Html_font:
1544 // the infamous font tag
1545 if (key == QLatin1String("size") && value.size()) {
1546 int n = value.toInt();
1547 if (value.at(0) != QLatin1Char('+') && value.at(0) != QLatin1Char('-'))
1548 n -= 3;
1549 node->charFormat.setProperty(QTextFormat::FontSizeAdjustment, n);
1550 } else if (key == QLatin1String("face")) {
1551 if (value.contains(QLatin1Char(','))) {
1552 const QStringList values = value.split(QLatin1Char(','));
1553 QStringList families;
1554 for (const QString &family : values)
1555 families << family.trimmed();
1556 node->charFormat.setFontFamilies(families);
1557 node->charFormat.setFontFamily(families.at(0));
1558 } else {
1559 node->charFormat.setFontFamily(value);
1560 }
1561 } else if (key == QLatin1String("color")) {
1562 QColor c; c.setNamedColor(value);
1563 if (!c.isValid())
1564 qWarning("QTextHtmlParser::applyAttributes: Unknown color name '%s'",value.toLatin1().constData());
1565 node->charFormat.setForeground(c);
1566 }
1567 break;
1568 case Html_ol:
1569 case Html_ul:
1570 if (key == QLatin1String("type")) {
1571 node->hasOwnListStyle = true;
1572 if (value == QLatin1String("1")) {
1573 node->listStyle = QTextListFormat::ListDecimal;
1574 } else if (value == QLatin1String("a")) {
1575 node->listStyle = QTextListFormat::ListLowerAlpha;
1576 } else if (value == QLatin1String("A")) {
1577 node->listStyle = QTextListFormat::ListUpperAlpha;
1578 } else if (value == QLatin1String("i")) {
1579 node->listStyle = QTextListFormat::ListLowerRoman;
1580 } else if (value == QLatin1String("I")) {
1581 node->listStyle = QTextListFormat::ListUpperRoman;
1582 } else {
1583 value = std::move(value).toLower();
1584 if (value == QLatin1String("square"))
1585 node->listStyle = QTextListFormat::ListSquare;
1586 else if (value == QLatin1String("disc"))
1587 node->listStyle = QTextListFormat::ListDisc;
1588 else if (value == QLatin1String("circle"))
1589 node->listStyle = QTextListFormat::ListCircle;
1590 else if (value == QLatin1String("none"))
1591 node->listStyle = QTextListFormat::ListStyleUndefined;
1592 }
1593 }
1594 break;
1595 case Html_a:
1596 if (key == QLatin1String("href"))
1597 node->charFormat.setAnchorHref(value);
1598 else if (key == QLatin1String("name"))
1599 node->charFormat.setAnchorNames({value});
1600 break;
1601 case Html_img:
1602 if (key == QLatin1String("src") || key == QLatin1String("source")) {
1603 node->imageName = value;
1604 } else if (key == QLatin1String("width")) {
1605 node->imageWidth = -2; // register that there is a value for it.
1606 setFloatAttribute(&node->imageWidth, value);
1607 } else if (key == QLatin1String("height")) {
1608 node->imageHeight = -2; // register that there is a value for it.
1609 setFloatAttribute(&node->imageHeight, value);
1610 } else if (key == QLatin1String("alt")) {
1611 node->imageAlt = value;
1612 } else if (key == QLatin1String("title")) {
1613 node->text = value;
1614 }
1615 break;
1616 case Html_tr:
1617 case Html_body:
1618 if (key == QLatin1String("bgcolor")) {
1619 QColor c; c.setNamedColor(value);
1620 if (!c.isValid())
1621 qWarning("QTextHtmlParser::applyAttributes: Unknown color name '%s'",value.toLatin1().constData());
1622 node->charFormat.setBackground(c);
1623 } else if (key == QLatin1String("background")) {
1624 node->applyBackgroundImage(value, resourceProvider);
1625 }
1626 break;
1627 case Html_th:
1628 case Html_td:
1629 if (key == QLatin1String("width")) {
1630 setWidthAttribute(&node->width, value);
1631 } else if (key == QLatin1String("bgcolor")) {
1632 QColor c; c.setNamedColor(value);
1633 if (!c.isValid())
1634 qWarning("QTextHtmlParser::applyAttributes: Unknown color name '%s'",value.toLatin1().constData());
1635 node->charFormat.setBackground(c);
1636 } else if (key == QLatin1String("background")) {
1637 node->applyBackgroundImage(value, resourceProvider);
1638 } else if (key == QLatin1String("rowspan")) {
1639 if (setIntAttribute(&node->tableCellRowSpan, value))
1640 node->tableCellRowSpan = qMax(1, node->tableCellRowSpan);
1641 } else if (key == QLatin1String("colspan")) {
1642 if (setIntAttribute(&node->tableCellColSpan, value))
1643 node->tableCellColSpan = qMax(1, node->tableCellColSpan);
1644 }
1645 break;
1646 case Html_table:
1647 if (key == QLatin1String("border")) {
1648 setFloatAttribute(&node->tableBorder, value);
1649 } else if (key == QLatin1String("bgcolor")) {
1650 QColor c; c.setNamedColor(value);
1651 if (!c.isValid())
1652 qWarning("QTextHtmlParser::applyAttributes: Unknown color name '%s'",value.toLatin1().constData());
1653 node->charFormat.setBackground(c);
1654 } else if (key == QLatin1String("background")) {
1655 node->applyBackgroundImage(value, resourceProvider);
1656 } else if (key == QLatin1String("cellspacing")) {
1657 setFloatAttribute(&node->tableCellSpacing, value);
1658 } else if (key == QLatin1String("cellpadding")) {
1659 setFloatAttribute(&node->tableCellPadding, value);
1660 } else if (key == QLatin1String("width")) {
1661 setWidthAttribute(&node->width, value);
1662 } else if (key == QLatin1String("height")) {
1663 setWidthAttribute(&node->height, value);
1664 }
1665 break;
1666 case Html_meta:
1667 if (key == QLatin1String("name")
1668 && value == QLatin1String("qrichtext")) {
1669 seenQt3Richtext = true;
1670 }
1671
1672 if (key == QLatin1String("content")
1673 && value == QLatin1String("1")
1674 && seenQt3Richtext) {
1675
1676 textEditMode = true;
1677 }
1678 break;
1679 case Html_hr:
1680 if (key == QLatin1String("width"))
1681 setWidthAttribute(&node->width, value);
1682 break;
1683 case Html_link:
1684 if (key == QLatin1String("href"))
1685 linkHref = value;
1686 else if (key == QLatin1String("type"))
1687 linkType = value;
1688 break;
1689 case Html_pre:
1690 if (key == QLatin1String("class") && value.startsWith(QLatin1String("language-")))
1691 node->blockFormat.setProperty(QTextFormat::BlockCodeLanguage, value.mid(9));
1692 break;
1693 default:
1694 break;
1695 }
1696
1697 if (key == QLatin1String("style")) {
1698#ifndef QT_NO_CSSPARSER
1699 node->parseStyleAttribute(value, resourceProvider);
1700#endif
1701 } else if (key == QLatin1String("align")) {
1702 value = std::move(value).toLower();
1703 bool alignmentSet = true;
1704
1705 if (value == QLatin1String("left"))
1706 node->blockFormat.setAlignment(Qt::AlignLeft|Qt::AlignAbsolute);
1707 else if (value == QLatin1String("right"))
1708 node->blockFormat.setAlignment(Qt::AlignRight|Qt::AlignAbsolute);
1709 else if (value == QLatin1String("center"))
1710 node->blockFormat.setAlignment(Qt::AlignHCenter);
1711 else if (value == QLatin1String("justify"))
1712 node->blockFormat.setAlignment(Qt::AlignJustify);
1713 else
1714 alignmentSet = false;
1715
1716 if (node->id == Html_img) {
1717 // HTML4 compat
1718 if (alignmentSet) {
1719 if (node->blockFormat.alignment() & Qt::AlignLeft)
1720 node->cssFloat = QTextFrameFormat::FloatLeft;
1721 else if (node->blockFormat.alignment() & Qt::AlignRight)
1722 node->cssFloat = QTextFrameFormat::FloatRight;
1723 } else if (value == QLatin1String("middle")) {
1724 node->charFormat.setVerticalAlignment(QTextCharFormat::AlignMiddle);
1725 } else if (value == QLatin1String("top")) {
1726 node->charFormat.setVerticalAlignment(QTextCharFormat::AlignTop);
1727 }
1728 }
1729 } else if (key == QLatin1String("valign")) {
1730 value = std::move(value).toLower();
1731 if (value == QLatin1String("top"))
1732 node->charFormat.setVerticalAlignment(QTextCharFormat::AlignTop);
1733 else if (value == QLatin1String("middle"))
1734 node->charFormat.setVerticalAlignment(QTextCharFormat::AlignMiddle);
1735 else if (value == QLatin1String("bottom"))
1736 node->charFormat.setVerticalAlignment(QTextCharFormat::AlignBottom);
1737 } else if (key == QLatin1String("dir")) {
1738 value = std::move(value).toLower();
1739 if (value == QLatin1String("ltr"))
1740 node->blockFormat.setLayoutDirection(Qt::LeftToRight);
1741 else if (value == QLatin1String("rtl"))
1742 node->blockFormat.setLayoutDirection(Qt::RightToLeft);
1743 } else if (key == QLatin1String("title")) {
1744 node->charFormat.setToolTip(value);
1745 } else if (key == QLatin1String("id")) {
1746 node->charFormat.setAnchor(true);
1747 node->charFormat.setAnchorNames({value});
1748 }
1749 }
1750
1751#ifndef QT_NO_CSSPARSER
1752 if (resourceProvider && !linkHref.isEmpty() && linkType == QLatin1String("text/css"))
1753 importStyleSheet(linkHref);
1754#endif
1755}
1756
1757#ifndef QT_NO_CSSPARSER
1758class QTextHtmlStyleSelector : public QCss::StyleSelector
1759{
1760public:
1761 inline QTextHtmlStyleSelector(const QTextHtmlParser *parser)
1762 : parser(parser) { nameCaseSensitivity = Qt::CaseInsensitive; }
1763
1764 virtual QStringList nodeNames(NodePtr node) const override;
1765 virtual QString attribute(NodePtr node, const QString &name) const override;
1766 virtual bool hasAttributes(NodePtr node) const override;
1767 virtual bool isNullNode(NodePtr node) const override;
1768 virtual NodePtr parentNode(NodePtr node) const override;
1769 virtual NodePtr previousSiblingNode(NodePtr node) const override;
1770 virtual NodePtr duplicateNode(NodePtr node) const override;
1771 virtual void freeNode(NodePtr node) const override;
1772
1773private:
1774 const QTextHtmlParser *parser;
1775};
1776
1777QStringList QTextHtmlStyleSelector::nodeNames(NodePtr node) const
1778{
1779 return QStringList(parser->at(node.id).tag.toLower());
1780}
1781
1782#endif // QT_NO_CSSPARSER
1783
1784#ifndef QT_NO_CSSPARSER
1785
1786static inline int findAttribute(const QStringList &attributes, const QString &name)
1787{
1788 int idx = -1;
1789 do {
1790 idx = attributes.indexOf(name, idx + 1);
1791 } while (idx != -1 && (idx % 2 == 1));
1792 return idx;
1793}
1794
1795QString QTextHtmlStyleSelector::attribute(NodePtr node, const QString &name) const
1796{
1797 const QStringList &attributes = parser->at(node.id).attributes;
1798 const int idx = findAttribute(attributes, name);
1799 if (idx == -1)
1800 return QString();
1801 return attributes.at(idx + 1);
1802}
1803
1804bool QTextHtmlStyleSelector::hasAttributes(NodePtr node) const
1805{
1806 const QStringList &attributes = parser->at(node.id).attributes;
1807 return !attributes.isEmpty();
1808}
1809
1810bool QTextHtmlStyleSelector::isNullNode(NodePtr node) const
1811{
1812 return node.id == 0;
1813}
1814
1815QCss::StyleSelector::NodePtr QTextHtmlStyleSelector::parentNode(NodePtr node) const
1816{
1817 NodePtr parent;
1818 parent.id = 0;
1819 if (node.id) {
1820 parent.id = parser->at(node.id).parent;
1821 }
1822 return parent;
1823}
1824
1825QCss::StyleSelector::NodePtr QTextHtmlStyleSelector::duplicateNode(NodePtr node) const
1826{
1827 return node;
1828}
1829
1830QCss::StyleSelector::NodePtr QTextHtmlStyleSelector::previousSiblingNode(NodePtr node) const
1831{
1832 NodePtr sibling;
1833 sibling.id = 0;
1834 if (!node.id)
1835 return sibling;
1836 int parent = parser->at(node.id).parent;
1837 if (!parent)
1838 return sibling;
1839 const int childIdx = parser->at(parent).children.indexOf(node.id);
1840 if (childIdx <= 0)
1841 return sibling;
1842 sibling.id = parser->at(parent).children.at(childIdx - 1);
1843 return sibling;
1844}
1845
1846void QTextHtmlStyleSelector::freeNode(NodePtr) const
1847{
1848}
1849
1850void QTextHtmlParser::resolveStyleSheetImports(const QCss::StyleSheet &sheet)
1851{
1852 for (int i = 0; i < sheet.importRules.count(); ++i) {
1853 const QCss::ImportRule &rule = sheet.importRules.at(i);
1854 if (rule.media.isEmpty()
1855 || rule.media.contains(QLatin1String("screen"), Qt::CaseInsensitive))
1856 importStyleSheet(rule.href);
1857 }
1858}
1859
1860void QTextHtmlParser::importStyleSheet(const QString &href)
1861{
1862 if (!resourceProvider)
1863 return;
1864 for (int i = 0; i < externalStyleSheets.count(); ++i)
1865 if (externalStyleSheets.at(i).url == href)
1866 return;
1867
1868 QVariant res = resourceProvider->resource(QTextDocument::StyleSheetResource, href);
1869 QString css;
1870 if (res.type() == QVariant::String) {
1871 css = res.toString();
1872 } else if (res.type() == QVariant::ByteArray) {
1873 // #### detect @charset
1874 css = QString::fromUtf8(res.toByteArray());
1875 }
1876 if (!css.isEmpty()) {
1877 QCss::Parser parser(css);
1878 QCss::StyleSheet sheet;
1879 parser.parse(&sheet, Qt::CaseInsensitive);
1880 externalStyleSheets.append(ExternalStyleSheet(href, sheet));
1881 resolveStyleSheetImports(sheet);
1882 }
1883}
1884
1885QVector<QCss::Declaration> standardDeclarationForNode(const QTextHtmlParserNode &node)
1886{
1887 QVector<QCss::Declaration> decls;
1888 QCss::Declaration decl;
1889 QCss::Value val;
1890 switch (node.id) {
1891 case Html_a:
1892 case Html_u: {
1893 bool needsUnderline = (node.id == Html_u) ? true : false;
1894 if (node.id == Html_a) {
1895 for (int i = 0; i < node.attributes.count(); i += 2) {
1896 const QString key = node.attributes.at(i);
1897 if (key.compare(QLatin1String("href"), Qt::CaseInsensitive) == 0
1898 && !node.attributes.at(i + 1).isEmpty()) {
1899 needsUnderline = true;
1900 decl.d->property = QLatin1String("color");
1901 decl.d->propertyId = QCss::Color;
1902 val.type = QCss::Value::Color;
1903 val.variant = QVariant(QGuiApplication::palette().link());
1904 decl.d->values = QVector<QCss::Value>() << val;
1905 decl.d->inheritable = true;
1906 decls << decl;
1907 break;
1908 }
1909 }
1910 }
1911 if (needsUnderline) {
1912 decl = QCss::Declaration();
1913 decl.d->property = QLatin1String("text-decoration");
1914 decl.d->propertyId = QCss::TextDecoration;
1915 val.type = QCss::Value::KnownIdentifier;
1916 val.variant = QVariant(QCss::Value_Underline);
1917 decl.d->values = QVector<QCss::Value>() << val;
1918 decl.d->inheritable = true;
1919 decls << decl;
1920 }
1921 break;
1922 }
1923 case Html_b:
1924 case Html_strong:
1925 case Html_h1:
1926 case Html_h2:
1927 case Html_h3:
1928 case Html_h4:
1929 case Html_h5:
1930 case Html_th:
1931 decl = QCss::Declaration();
1932 decl.d->property = QLatin1String("font-weight");
1933 decl.d->propertyId = QCss::FontWeight;
1934 val.type = QCss::Value::KnownIdentifier;
1935 val.variant = QVariant(QCss::Value_Bold);
1936 decl.d->values = QVector<QCss::Value>() << val;
1937 decl.d->inheritable = true;
1938 decls << decl;
1939 if (node.id == Html_b || node.id == Html_strong)
1940 break;
1941 Q_FALLTHROUGH();
1942 case Html_big:
1943 case Html_small:
1944 if (node.id != Html_th) {
1945 decl = QCss::Declaration();
1946 decl.d->property = QLatin1String("font-size");
1947 decl.d->propertyId = QCss::FontSize;
1948 decl.d->inheritable = false;
1949 val.type = QCss::Value::KnownIdentifier;
1950 switch (node.id) {
1951 case Html_h1: val.variant = QVariant(QCss::Value_XXLarge); break;
1952 case Html_h2: val.variant = QVariant(QCss::Value_XLarge); break;
1953 case Html_h3: case Html_big: val.variant = QVariant(QCss::Value_Large); break;
1954 case Html_h4: val.variant = QVariant(QCss::Value_Medium); break;
1955 case Html_h5: case Html_small: val.variant = QVariant(QCss::Value_Small); break;
1956 default: break;
1957 }
1958 decl.d->values = QVector<QCss::Value>() << val;
1959 decls << decl;
1960 break;
1961 }
1962 Q_FALLTHROUGH();
1963 case Html_center:
1964 case Html_td:
1965 decl = QCss::Declaration();
1966 decl.d->property = QLatin1String("text-align");
1967 decl.d->propertyId = QCss::TextAlignment;
1968 val.type = QCss::Value::KnownIdentifier;
1969 val.variant = (node.id == Html_td) ? QVariant(QCss::Value_Left) : QVariant(QCss::Value_Center);
1970 decl.d->values = QVector<QCss::Value>() << val;
1971 decl.d->inheritable = true;
1972 decls << decl;
1973 break;
1974 case Html_s:
1975 decl = QCss::Declaration();
1976 decl.d->property = QLatin1String("text-decoration");
1977 decl.d->propertyId = QCss::TextDecoration;
1978 val.type = QCss::Value::KnownIdentifier;
1979 val.variant = QVariant(QCss::Value_LineThrough);
1980 decl.d->values = QVector<QCss::Value>() << val;
1981 decl.d->inheritable = true;
1982 decls << decl;
1983 break;
1984 case Html_em:
1985 case Html_i:
1986 case Html_cite:
1987 case Html_address:
1988 case Html_var:
1989 case Html_dfn:
1990 decl = QCss::Declaration();
1991 decl.d->property = QLatin1String("font-style");
1992 decl.d->propertyId = QCss::FontStyle;
1993 val.type = QCss::Value::KnownIdentifier;
1994 val.variant = QVariant(QCss::Value_Italic);
1995 decl.d->values = QVector<QCss::Value>() << val;
1996 decl.d->inheritable = true;
1997 decls << decl;
1998 break;
1999 case Html_sub:
2000 case Html_sup:
2001 decl = QCss::Declaration();
2002 decl.d->property = QLatin1String("vertical-align");
2003 decl.d->propertyId = QCss::VerticalAlignment;
2004 val.type = QCss::Value::KnownIdentifier;
2005 val.variant = (node.id == Html_sub) ? QVariant(QCss::Value_Sub) : QVariant(QCss::Value_Super);
2006 decl.d->values = QVector<QCss::Value>() << val;
2007 decl.d->inheritable = true;
2008 decls << decl;
2009 break;
2010 case Html_ul:
2011 case Html_ol:
2012 decl = QCss::Declaration();
2013 decl.d->property = QLatin1String("list-style");
2014 decl.d->propertyId = QCss::ListStyle;
2015 val.type = QCss::Value::KnownIdentifier;
2016 val.variant = (node.id == Html_ul) ? QVariant(QCss::Value_Disc) : QVariant(QCss::Value_Decimal);
2017 decl.d->values = QVector<QCss::Value>() << val;
2018 decl.d->inheritable = true;
2019 decls << decl;
2020 break;
2021 case Html_code:
2022 case Html_tt:
2023 case Html_kbd:
2024 case Html_samp:
2025 case Html_pre: {
2026 decl = QCss::Declaration();
2027 decl.d->property = QLatin1String("font-family");
2028 decl.d->propertyId = QCss::FontFamily;
2029 QVector<QCss::Value> values;
2030 val.type = QCss::Value::String;
2031 val.variant = QFontDatabase::systemFont(QFontDatabase::FixedFont).family();
2032 values << val;
2033 decl.d->values = values;
2034 decl.d->inheritable = true;
2035 decls << decl;
2036 }
2037 if (node.id != Html_pre)
2038 break;
2039 Q_FALLTHROUGH();
2040 case Html_br:
2041 case Html_nobr:
2042 decl = QCss::Declaration();
2043 decl.d->property = QLatin1String("whitespace");
2044 decl.d->propertyId = QCss::Whitespace;
2045 val.type = QCss::Value::KnownIdentifier;
2046 switch (node.id) {
2047 case Html_br: val.variant = QVariant(QCss::Value_PreWrap); break;
2048 case Html_nobr: val.variant = QVariant(QCss::Value_NoWrap); break;
2049 case Html_pre: val.variant = QVariant(QCss::Value_Pre); break;
2050 default: break;
2051 }
2052 decl.d->values = QVector<QCss::Value>() << val;
2053 decl.d->inheritable = true;
2054 decls << decl;
2055 break;
2056 default:
2057 break;
2058 }
2059 return decls;
2060}
2061
2062QVector<QCss::Declaration> QTextHtmlParser::declarationsForNode(int node) const
2063{
2064 QVector<QCss::Declaration> decls;
2065
2066 QTextHtmlStyleSelector selector(this);
2067
2068 int idx = 0;
2069 selector.styleSheets.resize((resourceProvider ? 1 : 0)
2070 + externalStyleSheets.count()
2071 + inlineStyleSheets.count());
2072 if (resourceProvider)
2073 selector.styleSheets[idx++] = resourceProvider->docHandle()->parsedDefaultStyleSheet;
2074
2075 for (int i = 0; i < externalStyleSheets.count(); ++i, ++idx)
2076 selector.styleSheets[idx] = externalStyleSheets.at(i).sheet;
2077
2078 for (int i = 0; i < inlineStyleSheets.count(); ++i, ++idx)
2079 selector.styleSheets[idx] = inlineStyleSheets.at(i);
2080
2081 selector.medium = QLatin1String("screen");
2082
2083 QCss::StyleSelector::NodePtr n;
2084 n.id = node;
2085
2086 const char *extraPseudo = 0;
2087 if (nodes.at(node).id == Html_a && nodes.at(node).hasHref)
2088 extraPseudo = "link";
2089 // Ensure that our own style is taken into consideration
2090 decls = standardDeclarationForNode(nodes.at(node));
2091 decls += selector.declarationsForNode(n, extraPseudo);
2092 n = selector.parentNode(n);
2093 while (!selector.isNullNode(n)) {
2094 QVector<QCss::Declaration> inheritedDecls;
2095 inheritedDecls = selector.declarationsForNode(n, extraPseudo);
2096 for (int i = 0; i < inheritedDecls.size(); ++i) {
2097 const QCss::Declaration &decl = inheritedDecls.at(i);
2098 if (decl.d->inheritable)
2099 decls.prepend(decl);
2100 }
2101 n = selector.parentNode(n);
2102 }
2103 return decls;
2104}
2105
2106bool QTextHtmlParser::nodeIsChildOf(int i, QTextHTMLElements id) const
2107{
2108 while (i) {
2109 if (at(i).id == id)
2110 return true;
2111 i = at(i).parent;
2112 }
2113 return false;
2114}
2115
2116QT_END_NAMESPACE
2117#endif // QT_NO_CSSPARSER
2118
2119#endif // QT_NO_TEXTHTMLPARSER
2120