Warning: That file was not part of the compilation database. It may have many parsing errors.

1/****************************************************************************
2**
3** Copyright (C) 2014 Digia Plc and/or its subsidiary(-ies).
4** Contact: http://www.qt-project.org/legal
5**
6** This file is part of the tools applications of the Qt Toolkit.
7**
8** $QT_BEGIN_LICENSE:LGPL$
9** Commercial License Usage
10** Licensees holding valid commercial Qt licenses may use this file in
11** accordance with the commercial license agreement provided with the
12** Software or, alternatively, in accordance with the terms contained in
13** a written agreement between you and Digia. For licensing terms and
14** conditions see http://qt.digia.com/licensing. For further information
15** use the contact form at http://qt.digia.com/contact-us.
16**
17** GNU Lesser General Public License Usage
18** Alternatively, this file may be used under the terms of the GNU Lesser
19** General Public License version 2.1 as published by the Free Software
20** Foundation and appearing in the file LICENSE.LGPL included in the
21** packaging of this file. Please review the following information to
22** ensure the GNU Lesser General Public License version 2.1 requirements
23** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
24**
25** In addition, as a special exception, Digia gives you certain additional
26** rights. These rights are described in the Digia Qt LGPL Exception
27** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
28**
29** GNU General Public License Usage
30** Alternatively, this file may be used under the terms of the GNU
31** General Public License version 3.0 as published by the Free Software
32** Foundation and appearing in the file LICENSE.GPL included in the
33** packaging of this file. Please review the following information to
34** ensure the GNU General Public License version 3.0 requirements will be
35** met: http://www.gnu.org/copyleft/gpl.html.
36**
37**
38** $QT_END_LICENSE$
39**
40****************************************************************************/
41
42#include "config.h"
43#include "tokenizer.h"
44
45#include <qfile.h>
46#include <qhash.h>
47#include <qregexp.h>
48#include <qstring.h>
49#include <qtextcodec.h>
50
51#include <ctype.h>
52#include <string.h>
53
54QT_BEGIN_NAMESPACE
55
56#define LANGUAGE_CPP "Cpp"
57
58/* qmake ignore Q_OBJECT */
59
60/*
61 Keep in sync with tokenizer.h.
62*/
63static const char *kwords[] = {
64 "char", "class", "const", "double", "enum", "explicit",
65 "friend", "inline", "int", "long", "namespace", "operator",
66 "private", "protected", "public", "short", "signals", "signed",
67 "slots", "static", "struct", "template", "typedef", "typename",
68 "union", "unsigned", "using", "virtual", "void", "volatile",
69 "__int64",
70 "Q_OBJECT",
71 "Q_OVERRIDE",
72 "Q_PROPERTY",
73 "Q_PRIVATE_PROPERTY",
74 "Q_DECLARE_SEQUENTIAL_ITERATOR",
75 "Q_DECLARE_MUTABLE_SEQUENTIAL_ITERATOR",
76 "Q_DECLARE_ASSOCIATIVE_ITERATOR",
77 "Q_DECLARE_MUTABLE_ASSOCIATIVE_ITERATOR",
78 "Q_DECLARE_FLAGS",
79 "Q_SIGNALS",
80 "Q_SLOTS",
81 "QT_COMPAT",
82 "QT_COMPAT_CONSTRUCTOR",
83 "QT_DEPRECATED",
84 "QT_MOC_COMPAT",
85 "QT_MODULE",
86 "QT3_SUPPORT",
87 "QT3_SUPPORT_CONSTRUCTOR",
88 "QT3_MOC_SUPPORT",
89 "QDOC_PROPERTY"
90};
91
92static const int KwordHashTableSize = 4096;
93static int kwordHashTable[KwordHashTableSize];
94
95static QHash<QByteArray, bool> *ignoredTokensAndDirectives = 0;
96
97static QRegExp *comment = 0;
98static QRegExp *versionX = 0;
99static QRegExp *definedX = 0;
100
101static QRegExp *defines = 0;
102static QRegExp *falsehoods = 0;
103
104static QTextCodec *sourceCodec = 0;
105
106/*
107 This function is a perfect hash function for the 37 keywords of C99
108 (with a hash table size of 512). It should perform well on our
109 Qt-enhanced C++ subset.
110*/
111static int hashKword(const char *s, int len)
112{
113 return (((uchar) s[0]) + (((uchar) s[2]) << 5) +
114 (((uchar) s[len - 1]) << 3)) % KwordHashTableSize;
115}
116
117static void insertKwordIntoHash(const char *s, int number)
118{
119 int k = hashKword(s, strlen(s));
120 while (kwordHashTable[k]) {
121 if (++k == KwordHashTableSize)
122 k = 0;
123 }
124 kwordHashTable[k] = number;
125}
126
127Tokenizer::Tokenizer(const Location& loc, QFile &in)
128{
129 init();
130 yyIn = in.readAll();
131 yyPos = 0;
132 start(loc);
133}
134
135Tokenizer::Tokenizer(const Location& loc, const QByteArray &in)
136 : yyIn(in)
137{
138 init();
139 yyPos = 0;
140 start(loc);
141}
142
143Tokenizer::~Tokenizer()
144{
145 delete[] yyLexBuf1;
146 delete[] yyLexBuf2;
147}
148
149int Tokenizer::getToken()
150{
151 char *t = yyPrevLex;
152 yyPrevLex = yyLex;
153 yyLex = t;
154
155 while (yyCh != EOF) {
156 yyTokLoc = yyCurLoc;
157 yyLexLen = 0;
158
159 if (isspace(yyCh)) {
160 do {
161 yyCh = getChar();
162 } while (isspace(yyCh));
163 }
164 else if (isalpha(yyCh) || yyCh == '_') {
165 do {
166 yyCh = getChar();
167 } while (isalnum(yyCh) || yyCh == '_');
168
169 int k = hashKword(yyLex, yyLexLen);
170 for (;;) {
171 int i = kwordHashTable[k];
172 if (i == 0) {
173 return Tok_Ident;
174 }
175 else if (i == -1) {
176 if (!parsingMacro && ignoredTokensAndDirectives->contains(yyLex)) {
177 if (ignoredTokensAndDirectives->value(yyLex)) { // it's a directive
178 int parenDepth = 0;
179 while (yyCh != EOF && (yyCh != ')' || parenDepth > 1)) {
180 if (yyCh == '(')
181 ++parenDepth;
182 else if (yyCh == ')')
183 --parenDepth;
184 yyCh = getChar();
185 }
186 if (yyCh == ')')
187 yyCh = getChar();
188 }
189 break;
190 }
191 }
192 else if (strcmp(yyLex, kwords[i - 1]) == 0) {
193 int ret = (int) Tok_FirstKeyword + i - 1;
194 if (ret != Tok_explicit && ret != Tok_inline && ret != Tok_typename)
195 return ret;
196 break;
197 }
198
199 if (++k == KwordHashTableSize)
200 k = 0;
201 }
202 }
203 else if (isdigit(yyCh)) {
204 do {
205 yyCh = getChar();
206 } while (isalnum(yyCh) || yyCh == '.' || yyCh == '+' ||
207 yyCh == '-');
208 return Tok_Number;
209 }
210 else {
211 switch (yyCh) {
212 case '!':
213 case '%':
214 yyCh = getChar();
215 if (yyCh == '=')
216 yyCh = getChar();
217 return Tok_SomeOperator;
218 case '"':
219 yyCh = getChar();
220
221 while (yyCh != EOF && yyCh != '"') {
222 if (yyCh == '\\')
223 yyCh = getChar();
224 yyCh = getChar();
225 }
226 yyCh = getChar();
227
228 if (yyCh == EOF)
229 yyTokLoc.warning(tr("Unterminated C++ string literal"),
230 tr("Maybe you forgot '/*!' at the beginning of the file?"));
231 else
232 return Tok_String;
233 break;
234 case '#':
235 return getTokenAfterPreprocessor();
236 case '&':
237 yyCh = getChar();
238 if (yyCh == '&' || yyCh == '=') {
239 yyCh = getChar();
240 return Tok_SomeOperator;
241 }
242 else {
243 return Tok_Ampersand;
244 }
245 case '\'':
246 yyCh = getChar();
247 if (yyCh == '\\')
248 yyCh = getChar();
249 do {
250 yyCh = getChar();
251 } while (yyCh != EOF && yyCh != '\'');
252
253 if (yyCh == EOF) {
254 yyTokLoc.warning(tr("Unterminated C++ character"
255 " literal"));
256 }
257 else {
258 yyCh = getChar();
259 return Tok_Number;
260 }
261 break;
262 case '(':
263 yyCh = getChar();
264 if (yyNumPreprocessorSkipping == 0)
265 yyParenDepth++;
266 if (isspace(yyCh)) {
267 do {
268 yyCh = getChar();
269 } while (isspace(yyCh));
270 yyLexLen = 1;
271 yyLex[1] = '\0';
272 }
273 if (yyCh == '*') {
274 yyCh = getChar();
275 return Tok_LeftParenAster;
276 }
277 return Tok_LeftParen;
278 case ')':
279 yyCh = getChar();
280 if (yyNumPreprocessorSkipping == 0)
281 yyParenDepth--;
282 return Tok_RightParen;
283 case '*':
284 yyCh = getChar();
285 if (yyCh == '=') {
286 yyCh = getChar();
287 return Tok_SomeOperator;
288 } else {
289 return Tok_Aster;
290 }
291 case '^':
292 yyCh = getChar();
293 if (yyCh == '=') {
294 yyCh = getChar();
295 return Tok_SomeOperator;
296 } else {
297 return Tok_Caret;
298 }
299 case '+':
300 yyCh = getChar();
301 if (yyCh == '+' || yyCh == '=')
302 yyCh = getChar();
303 return Tok_SomeOperator;
304 case ',':
305 yyCh = getChar();
306 return Tok_Comma;
307 case '-':
308 yyCh = getChar();
309 if (yyCh == '-' || yyCh == '=') {
310 yyCh = getChar();
311 } else if (yyCh == '>') {
312 yyCh = getChar();
313 if (yyCh == '*')
314 yyCh = getChar();
315 }
316 return Tok_SomeOperator;
317 case '.':
318 yyCh = getChar();
319 if (yyCh == '*') {
320 yyCh = getChar();
321 } else if (yyCh == '.') {
322 do {
323 yyCh = getChar();
324 } while (yyCh == '.');
325 return Tok_Ellipsis;
326 } else if (isdigit(yyCh)) {
327 do {
328 yyCh = getChar();
329 } while (isalnum(yyCh) || yyCh == '.' || yyCh == '+' ||
330 yyCh == '-');
331 return Tok_Number;
332 }
333 return Tok_SomeOperator;
334 case '/':
335 yyCh = getChar();
336 if (yyCh == '/') {
337 do {
338 yyCh = getChar();
339 } while (yyCh != EOF && yyCh != '\n');
340 } else if (yyCh == '*') {
341 bool metDoc = false; // empty doc is no doc
342 bool metSlashAsterBang = false;
343 bool metAster = false;
344 bool metAsterSlash = false;
345
346 yyCh = getChar();
347 if (yyCh == '!')
348 metSlashAsterBang = true;
349
350 while (!metAsterSlash) {
351 if (yyCh == EOF) {
352 yyTokLoc.warning(tr("Unterminated C++ comment"));
353 break;
354 } else {
355 if (yyCh == '*') {
356 metAster = true;
357 } else if (metAster && yyCh == '/') {
358 metAsterSlash = true;
359 } else {
360 metAster = false;
361 if (isgraph(yyCh))
362 metDoc = true;
363 }
364 }
365 yyCh = getChar();
366 }
367 if (metSlashAsterBang && metDoc)
368 return Tok_Doc;
369 else if (yyParenDepth > 0)
370 return Tok_Comment;
371 } else {
372 if (yyCh == '=')
373 yyCh = getChar();
374 return Tok_SomeOperator;
375 }
376 break;
377 case ':':
378 yyCh = getChar();
379 if (yyCh == ':') {
380 yyCh = getChar();
381 return Tok_Gulbrandsen;
382 } else {
383 return Tok_Colon;
384 }
385 case ';':
386 yyCh = getChar();
387 return Tok_Semicolon;
388 case '<':
389 yyCh = getChar();
390 if (yyCh == '<') {
391 yyCh = getChar();
392 if (yyCh == '=')
393 yyCh = getChar();
394 return Tok_SomeOperator;
395 } else if (yyCh == '=') {
396 yyCh = getChar();
397 return Tok_SomeOperator;
398 } else {
399 return Tok_LeftAngle;
400 }
401 case '=':
402 yyCh = getChar();
403 if (yyCh == '=') {
404 yyCh = getChar();
405 return Tok_SomeOperator;
406 } else {
407 return Tok_Equal;
408 }
409 case '>':
410 yyCh = getChar();
411 if (yyCh == '>') {
412 yyCh = getChar();
413 if (yyCh == '=')
414 yyCh = getChar();
415 return Tok_SomeOperator;
416 } else if (yyCh == '=') {
417 yyCh = getChar();
418 return Tok_SomeOperator;
419 } else {
420 return Tok_RightAngle;
421 }
422 case '?':
423 yyCh = getChar();
424 return Tok_SomeOperator;
425 case '[':
426 yyCh = getChar();
427 if (yyNumPreprocessorSkipping == 0)
428 yyBracketDepth++;
429 return Tok_LeftBracket;
430 case '\\':
431 yyCh = getChar();
432 yyCh = getChar(); // skip one character
433 break;
434 case ']':
435 yyCh = getChar();
436 if (yyNumPreprocessorSkipping == 0)
437 yyBracketDepth--;
438 return Tok_RightBracket;
439 case '{':
440 yyCh = getChar();
441 if (yyNumPreprocessorSkipping == 0)
442 yyBraceDepth++;
443 return Tok_LeftBrace;
444 case '}':
445 yyCh = getChar();
446 if (yyNumPreprocessorSkipping == 0)
447 yyBraceDepth--;
448 return Tok_RightBrace;
449 case '|':
450 yyCh = getChar();
451 if (yyCh == '|' || yyCh == '=')
452 yyCh = getChar();
453 return Tok_SomeOperator;
454 case '~':
455 yyCh = getChar();
456 return Tok_Tilde;
457 case '@':
458 yyCh = getChar();
459 return Tok_At;
460 default:
461 // ### We should really prevent qdoc from looking at snippet files rather than
462 // ### suppress warnings when reading them.
463 if (yyNumPreprocessorSkipping == 0 && !yyTokLoc.fileName().endsWith(".qdoc")) {
464 yyTokLoc.warning(tr("Hostile character 0x%1 in C++ source")
465 .arg((uchar)yyCh, 1, 16));
466 }
467 yyCh = getChar();
468 }
469 }
470 }
471
472 if (yyPreprocessorSkipping.count() > 1) {
473 yyTokLoc.warning(tr("Expected #endif before end of file"));
474 // clear it out or we get an infinite loop!
475 while (!yyPreprocessorSkipping.isEmpty()) {
476 popSkipping();
477 }
478 }
479
480 strcpy(yyLex, "end-of-input");
481 yyLexLen = strlen(yyLex);
482 return Tok_Eoi;
483}
484
485void Tokenizer::initialize(const Config &config)
486{
487 QString versionSym = config.getString(CONFIG_VERSIONSYM);
488
489 QString sourceEncoding = config.getString(CONFIG_SOURCEENCODING);
490 if (sourceEncoding.isEmpty())
491 sourceEncoding = QLatin1String("ISO-8859-1");
492 sourceCodec = QTextCodec::codecForName(sourceEncoding.toLocal8Bit());
493
494 comment = new QRegExp("/(?:\\*.*\\*/|/.*\n|/[^\n]*$)");
495 comment->setMinimal(true);
496 versionX = new QRegExp("$cannot possibly match^");
497 if (!versionSym.isEmpty())
498 versionX->setPattern("[ \t]*(?:" + QRegExp::escape(versionSym)
499 + ")[ \t]+\"([^\"]*)\"[ \t]*");
500 definedX = new QRegExp("defined ?\\(?([A-Z_0-9a-z]+) ?\\)");
501
502 QStringList d = config.getStringList(CONFIG_DEFINES);
503 d += "qdoc";
504 defines = new QRegExp(d.join("|"));
505 falsehoods = new QRegExp(config.getStringList(CONFIG_FALSEHOODS).join("|"));
506
507 memset(kwordHashTable, 0, sizeof(kwordHashTable));
508 for (int i = 0; i < Tok_LastKeyword - Tok_FirstKeyword + 1; i++)
509 insertKwordIntoHash(kwords[i], i + 1);
510
511 ignoredTokensAndDirectives = new QHash<QByteArray, bool>;
512
513 QStringList tokens = config.getStringList(LANGUAGE_CPP + Config::dot + CONFIG_IGNORETOKENS);
514 foreach (const QString &t, tokens) {
515 const QByteArray tb = t.toAscii();
516 ignoredTokensAndDirectives->insert(tb, false);
517 insertKwordIntoHash(tb.data(), -1);
518 }
519
520 QStringList directives = config.getStringList(LANGUAGE_CPP + Config::dot
521 + CONFIG_IGNOREDIRECTIVES);
522 foreach (const QString &d, directives) {
523 const QByteArray db = d.toAscii();
524 ignoredTokensAndDirectives->insert(db, true);
525 insertKwordIntoHash(db.data(), -1);
526 }
527}
528
529void Tokenizer::terminate()
530{
531 delete comment;
532 comment = 0;
533 delete versionX;
534 versionX = 0;
535 delete definedX;
536 definedX = 0;
537 delete defines;
538 defines = 0;
539 delete falsehoods;
540 falsehoods = 0;
541 delete ignoredTokensAndDirectives;
542 ignoredTokensAndDirectives = 0;
543}
544
545void Tokenizer::init()
546{
547 yyLexBuf1 = new char[(int) yyLexBufSize];
548 yyLexBuf2 = new char[(int) yyLexBufSize];
549 yyPrevLex = yyLexBuf1;
550 yyPrevLex[0] = '\0';
551 yyLex = yyLexBuf2;
552 yyLex[0] = '\0';
553 yyLexLen = 0;
554 yyPreprocessorSkipping.push(false);
555 yyNumPreprocessorSkipping = 0;
556 yyBraceDepth = 0;
557 yyParenDepth = 0;
558 yyBracketDepth = 0;
559 yyCh = '\0';
560 parsingMacro = false;
561}
562
563void Tokenizer::start(const Location& loc)
564{
565 yyTokLoc = loc;
566 yyCurLoc = loc;
567 yyCurLoc.start();
568 strcpy(yyPrevLex, "beginning-of-input");
569 strcpy(yyLex, "beginning-of-input");
570 yyLexLen = strlen(yyLex);
571 yyBraceDepth = 0;
572 yyParenDepth = 0;
573 yyBracketDepth = 0;
574 yyCh = '\0';
575 yyCh = getChar();
576}
577
578/*
579 Returns the next token, if # was met. This function interprets the
580 preprocessor directive, skips over any #ifdef'd out tokens, and returns the
581 token after all of that.
582*/
583int Tokenizer::getTokenAfterPreprocessor()
584{
585 yyCh = getChar();
586 while (isspace(yyCh) && yyCh != '\n')
587 yyCh = getChar();
588
589 /*
590 #directive condition
591 */
592 QString directive;
593 QString condition;
594
595 while (isalpha(yyCh)) {
596 directive += QChar(yyCh);
597 yyCh = getChar();
598 }
599 if (!directive.isEmpty()) {
600 while (yyCh != EOF && yyCh != '\n') {
601 if (yyCh == '\\')
602 yyCh = getChar();
603 condition += yyCh;
604 yyCh = getChar();
605 }
606 condition.replace(*comment, "");
607 condition = condition.simplified();
608
609 /*
610 The #if, #ifdef, #ifndef, #elif, #else, and #endif
611 directives have an effect on the skipping stack. For
612 instance, if the code processed so far is
613
614 #if 1
615 #if 0
616 #if 1
617 // ...
618 #else
619
620 the skipping stack contains, from bottom to top, false true
621 true (assuming 0 is false and 1 is true). If at least one
622 entry of the stack is true, the tokens are skipped.
623
624 This mechanism is simple yet hard to understand.
625 */
626 if (directive[0] == QChar('i')) {
627 if (directive == QString("if"))
628 pushSkipping(!isTrue(condition));
629 else if (directive == QString("ifdef"))
630 pushSkipping(!defines->exactMatch(condition));
631 else if (directive == QString("ifndef"))
632 pushSkipping(defines->exactMatch(condition));
633 } else if (directive[0] == QChar('e')) {
634 if (directive == QString("elif")) {
635 bool old = popSkipping();
636 if (old)
637 pushSkipping(!isTrue(condition));
638 else
639 pushSkipping(true);
640 } else if (directive == QString("else")) {
641 pushSkipping(!popSkipping());
642 } else if (directive == QString("endif")) {
643 popSkipping();
644 }
645 } else if (directive == QString("define")) {
646 if (versionX->exactMatch(condition))
647 yyVersion = versionX->cap(1);
648 }
649 }
650
651 int tok;
652 do {
653 /*
654 We set yyLex now, and after getToken() this will be
655 yyPrevLex. This way, we skip over the preprocessor
656 directive.
657 */
658 qstrcpy(yyLex, yyPrevLex);
659
660 /*
661 If getToken() meets another #, it will call
662 getTokenAfterPreprocessor() once again, which could in turn
663 call getToken() again, etc. Unless there are 10,000 or so
664 preprocessor directives in a row, this shouldn't overflow
665 the stack.
666 */
667 tok = getToken();
668 } while (yyNumPreprocessorSkipping > 0);
669 return tok;
670}
671
672/*
673 Pushes a new skipping value onto the stack. This corresponds to entering a
674 new #if block.
675*/
676void Tokenizer::pushSkipping(bool skip)
677{
678 yyPreprocessorSkipping.push(skip);
679 if (skip)
680 yyNumPreprocessorSkipping++;
681}
682
683/*
684 Pops a skipping value from the stack. This corresponds to reaching a #endif.
685*/
686bool Tokenizer::popSkipping()
687{
688 if (yyPreprocessorSkipping.isEmpty()) {
689 yyTokLoc.warning(tr("Unexpected #elif, #else or #endif"));
690 return true;
691 }
692
693 bool skip = yyPreprocessorSkipping.pop();
694 if (skip)
695 yyNumPreprocessorSkipping--;
696 return skip;
697}
698
699/*
700 Returns true if the condition evaluates as true, otherwise false. The
701 condition is represented by a string. Unsophisticated parsing techniques are
702 used. The preprocessing method could be named StriNg-Oriented PreProcessing,
703 as SNOBOL stands for StriNg-Oriented symBOlic Language.
704*/
705bool Tokenizer::isTrue(const QString &condition)
706{
707 int firstOr = -1;
708 int firstAnd = -1;
709 int parenDepth = 0;
710
711 /*
712 Find the first logical operator at top level, but be careful
713 about precedence. Examples:
714
715 X || Y // the or
716 X || Y || Z // the leftmost or
717 X || Y && Z // the or
718 X && Y || Z // the or
719 (X || Y) && Z // the and
720 */
721 for (int i = 0; i < (int) condition.length() - 1; i++) {
722 QChar ch = condition[i];
723 if (ch == QChar('(')) {
724 parenDepth++;
725 } else if (ch == QChar(')')) {
726 parenDepth--;
727 } else if (parenDepth == 0) {
728 if (condition[i + 1] == ch) {
729 if (ch == QChar('|')) {
730 firstOr = i;
731 break;
732 } else if (ch == QChar('&')) {
733 if (firstAnd == -1)
734 firstAnd = i;
735 }
736 }
737 }
738 }
739 if (firstOr != -1)
740 return isTrue(condition.left(firstOr)) ||
741 isTrue(condition.mid(firstOr + 2));
742 if (firstAnd != -1)
743 return isTrue(condition.left(firstAnd)) &&
744 isTrue(condition.mid(firstAnd + 2));
745
746 QString t = condition.simplified();
747 if (t.isEmpty())
748 return true;
749
750 if (t[0] == QChar('!'))
751 return !isTrue(t.mid(1));
752 if (t[0] == QChar('(') && t.right(1)[0] == QChar(')'))
753 return isTrue(t.mid(1, t.length() - 2));
754
755 if (definedX->exactMatch(t))
756 return defines->exactMatch(definedX->cap(1));
757 else
758 return !falsehoods->exactMatch(t);
759}
760
761QString Tokenizer::lexeme() const
762{
763 return sourceCodec->toUnicode(yyLex);
764}
765
766QString Tokenizer::previousLexeme() const
767{
768 return sourceCodec->toUnicode(yyPrevLex);
769}
770
771QT_END_NAMESPACE
772

Warning: That file was not part of the compilation database. It may have many parsing errors.