1/****************************************************************************
2**
3** Copyright (C) 2016 The Qt Company Ltd.
4** Contact: https://www.qt.io/licensing/
5**
6** This file is part of the Qt Linguist of the Qt Toolkit.
7**
8** $QT_BEGIN_LICENSE:GPL-EXCEPT$
9** Commercial License Usage
10** Licensees holding valid commercial Qt licenses may use this file in
11** accordance with the commercial license agreement provided with the
12** Software or, alternatively, in accordance with the terms contained in
13** a written agreement between you and The Qt Company. For licensing terms
14** and conditions see https://www.qt.io/terms-conditions. For further
15** information use the contact form at https://www.qt.io/contact-us.
16**
17** GNU General Public License Usage
18** Alternatively, this file may be used under the terms of the GNU
19** General Public License version 3 as published by the Free Software
20** Foundation with exceptions as appearing in the file LICENSE.GPL3-EXCEPT
21** included in the packaging of this file. Please review the following
22** information to ensure the GNU General Public License requirements will
23** be met: https://www.gnu.org/licenses/gpl-3.0.html.
24**
25** $QT_END_LICENSE$
26**
27****************************************************************************/
28
29#include "lupdate.h"
30
31#include <translator.h>
32
33#include <QtCore/QDebug>
34#include <QtCore/QFile>
35#include <QtCore/QRegExp>
36#include <QtCore/QStack>
37#include <QtCore/QStack>
38#include <QtCore/QString>
39#include <QtCore/QTextCodec>
40#include <QtCore/QCoreApplication>
41
42#include <iostream>
43
44#include <ctype.h>
45
46QT_BEGIN_NAMESPACE
47
48enum { Tok_Eof, Tok_class, Tok_return, Tok_tr,
49 Tok_translate, Tok_Ident, Tok_Package,
50 Tok_Comment, Tok_String, Tok_Colon, Tok_Dot,
51 Tok_LeftBrace, Tok_RightBrace, Tok_LeftParen,
52 Tok_RightParen, Tok_Comma, Tok_Semicolon,
53 Tok_Integer, Tok_Plus, Tok_PlusPlus, Tok_PlusEq, Tok_null };
54
55class Scope
56{
57 public:
58 QString name;
59 enum Type {Clazz, Function, Other} type;
60 int line;
61
62 Scope(const QString & name, Type type, int line) :
63 name(name),
64 type(type),
65 line(line)
66 {}
67
68 ~Scope()
69 {}
70};
71
72/*
73 The tokenizer maintains the following global variables. The names
74 should be self-explanatory.
75*/
76
77static QString yyFileName;
78static QChar yyCh;
79static QString yyIdent;
80static QString yyComment;
81static QString yyString;
82
83
84static qlonglong yyInteger;
85static int yyParenDepth;
86static int yyLineNo;
87static int yyCurLineNo;
88static int yyParenLineNo;
89static int yyTok;
90
91// the string to read from and current position in the string
92static QString yyInStr;
93static int yyInPos;
94
95// The parser maintains the following global variables.
96static QString yyPackage;
97static QStack<Scope*> yyScope;
98
99std::ostream &yyMsg(int line = 0)
100{
101 return std::cerr << qPrintable(yyFileName) << ':' << (line ? line : yyLineNo) << ": ";
102}
103
104static QChar getChar()
105{
106 if (yyInPos >= yyInStr.size())
107 return QChar(EOF);
108 QChar c = yyInStr[yyInPos++];
109 if (c == QLatin1Char('\n'))
110 ++yyCurLineNo;
111 return c;
112}
113
114static int getToken()
115{
116 const char tab[] = "bfnrt\"\'\\";
117 const char backTab[] = "\b\f\n\r\t\"\'\\";
118
119 yyIdent.clear();
120 yyComment.clear();
121 yyString.clear();
122
123 while (yyCh != QChar(EOF)) {
124 yyLineNo = yyCurLineNo;
125
126 if ( yyCh.isLetter() || yyCh.toLatin1() == '_' ) {
127 do {
128 yyIdent.append(c: yyCh);
129 yyCh = getChar();
130 } while ( yyCh.isLetterOrNumber() || yyCh.toLatin1() == '_' );
131
132 if (yyTok != Tok_Dot) {
133 switch ( yyIdent.at(i: 0).toLatin1() ) {
134 case 'r':
135 if ( yyIdent == QLatin1String("return") )
136 return Tok_return;
137 break;
138 case 'c':
139 if ( yyIdent == QLatin1String("class") )
140 return Tok_class;
141 break;
142 case 'n':
143 if ( yyIdent == QLatin1String("null") )
144 return Tok_null;
145 break;
146 }
147 }
148 switch ( yyIdent.at(i: 0).toLatin1() ) {
149 case 'p':
150 if( yyIdent == QLatin1String("package") )
151 return Tok_Package;
152 break;
153 case 't':
154 if ( yyIdent == QLatin1String("tr") )
155 return Tok_tr;
156 if ( yyIdent == QLatin1String("translate") )
157 return Tok_translate;
158 }
159 return Tok_Ident;
160 } else {
161 switch ( yyCh.toLatin1() ) {
162
163 case '/':
164 yyCh = getChar();
165 if ( yyCh == QLatin1Char('/') ) {
166 do {
167 yyCh = getChar();
168 if (yyCh == QChar(EOF))
169 break;
170 yyComment.append(c: yyCh);
171 } while (yyCh != QLatin1Char('\n'));
172 return Tok_Comment;
173
174 } else if ( yyCh == QLatin1Char('*') ) {
175 bool metAster = false;
176 bool metAsterSlash = false;
177
178 while ( !metAsterSlash ) {
179 yyCh = getChar();
180 if (yyCh == QChar(EOF)) {
181 yyMsg() << qPrintable(LU::tr("Unterminated Java comment.\n"));
182 return Tok_Comment;
183 }
184
185 yyComment.append( c: yyCh );
186
187 if ( yyCh == QLatin1Char('*') )
188 metAster = true;
189 else if ( metAster && yyCh == QLatin1Char('/') )
190 metAsterSlash = true;
191 else
192 metAster = false;
193 }
194 yyComment.chop(n: 2);
195 yyCh = getChar();
196
197 return Tok_Comment;
198 }
199 break;
200 case '"':
201 yyCh = getChar();
202
203 while (yyCh != QChar(EOF) && yyCh != QLatin1Char('\n') && yyCh != QLatin1Char('"')) {
204 if ( yyCh == QLatin1Char('\\') ) {
205 yyCh = getChar();
206 if ( yyCh == QLatin1Char('u') ) {
207 yyCh = getChar();
208 uint unicode(0);
209 for (int i = 4; i > 0; --i) {
210 unicode = unicode << 4;
211 if( yyCh.isDigit() ) {
212 unicode += yyCh.digitValue();
213 }
214 else {
215 int sub(yyCh.toLower().toLatin1() - 87);
216 if( sub > 15 || sub < 10) {
217 yyMsg() << qPrintable(LU::tr("Invalid Unicode value.\n"));
218 break;
219 }
220 unicode += sub;
221 }
222 yyCh = getChar();
223 }
224 yyString.append(c: QChar(unicode));
225 }
226 else if ( yyCh == QLatin1Char('\n') ) {
227 yyCh = getChar();
228 }
229 else {
230 yyString.append( c: QLatin1Char(backTab[strchr( s: tab, c: yyCh.toLatin1() ) - tab]) );
231 yyCh = getChar();
232 }
233 } else {
234 yyString.append(c: yyCh);
235 yyCh = getChar();
236 }
237 }
238
239 if ( yyCh != QLatin1Char('"') )
240 yyMsg() << qPrintable(LU::tr("Unterminated string.\n"));
241
242 yyCh = getChar();
243
244 return Tok_String;
245
246 case ':':
247 yyCh = getChar();
248 return Tok_Colon;
249 case '\'':
250 yyCh = getChar();
251
252 if ( yyCh == QLatin1Char('\\') )
253 yyCh = getChar();
254 do {
255 yyCh = getChar();
256 } while (yyCh != QChar(EOF) && yyCh != QLatin1Char('\''));
257 yyCh = getChar();
258 break;
259 case '{':
260 yyCh = getChar();
261 return Tok_LeftBrace;
262 case '}':
263 yyCh = getChar();
264 return Tok_RightBrace;
265 case '(':
266 if (yyParenDepth == 0)
267 yyParenLineNo = yyCurLineNo;
268 yyParenDepth++;
269 yyCh = getChar();
270 return Tok_LeftParen;
271 case ')':
272 if (yyParenDepth == 0)
273 yyParenLineNo = yyCurLineNo;
274 yyParenDepth--;
275 yyCh = getChar();
276 return Tok_RightParen;
277 case ',':
278 yyCh = getChar();
279 return Tok_Comma;
280 case '.':
281 yyCh = getChar();
282 return Tok_Dot;
283 case ';':
284 yyCh = getChar();
285 return Tok_Semicolon;
286 case '+':
287 yyCh = getChar();
288 if (yyCh == QLatin1Char('+')) {
289 yyCh = getChar();
290 return Tok_PlusPlus;
291 }
292 if( yyCh == QLatin1Char('=') ){
293 yyCh = getChar();
294 return Tok_PlusEq;
295 }
296 return Tok_Plus;
297 case '0':
298 case '1':
299 case '2':
300 case '3':
301 case '4':
302 case '5':
303 case '6':
304 case '7':
305 case '8':
306 case '9':
307 {
308 QByteArray ba;
309 ba += yyCh.toLatin1();
310 yyCh = getChar();
311 bool hex = yyCh == QLatin1Char('x');
312 if ( hex ) {
313 ba += yyCh.toLatin1();
314 yyCh = getChar();
315 }
316 while ( hex ? isxdigit(yyCh.toLatin1()) : yyCh.isDigit() ) {
317 ba += yyCh.toLatin1();
318 yyCh = getChar();
319 }
320 bool ok;
321 yyInteger = ba.toLongLong(ok: &ok);
322 if (ok) return Tok_Integer;
323 break;
324 }
325 default:
326 yyCh = getChar();
327 }
328 }
329 }
330 return Tok_Eof;
331}
332
333static bool match( int t )
334{
335 bool matches = ( yyTok == t );
336 if ( matches )
337 yyTok = getToken();
338 return matches;
339}
340
341static bool matchString( QString &s )
342{
343 if ( yyTok != Tok_String )
344 return false;
345
346 s = yyString;
347 yyTok = getToken();
348 while ( yyTok == Tok_Plus ) {
349 yyTok = getToken();
350 if (yyTok == Tok_String)
351 s += yyString;
352 else {
353 yyMsg() << qPrintable(LU::tr(
354 "String used in translation can contain only literals"
355 " concatenated with other literals, not expressions or numbers.\n"));
356 return false;
357 }
358 yyTok = getToken();
359 }
360 return true;
361}
362
363static bool matchStringOrNull(QString &s)
364{
365 bool matches = matchString(s);
366 if (!matches) {
367 matches = (yyTok == Tok_null);
368 if (matches)
369 yyTok = getToken();
370 }
371 return matches;
372}
373
374/*
375 * match any expression that can return a number, which can be
376 * 1. Literal number (e.g. '11')
377 * 2. simple identifier (e.g. 'm_count')
378 * 3. simple function call (e.g. 'size()' )
379 * 4. function call on an object (e.g. 'list.size()')
380 * 5. function call on an object (e.g. 'list->size()')
381 *
382 * Other cases:
383 * size(2,4)
384 * list().size()
385 * list(a,b).size(2,4)
386 * etc...
387 */
388static bool matchExpression()
389{
390 if (match(t: Tok_Integer)) {
391 return true;
392 }
393
394 int parenlevel = 0;
395 while (match(t: Tok_Ident) || parenlevel > 0) {
396 if (yyTok == Tok_RightParen) {
397 if (parenlevel == 0) break;
398 --parenlevel;
399 yyTok = getToken();
400 } else if (yyTok == Tok_LeftParen) {
401 yyTok = getToken();
402 if (yyTok == Tok_RightParen) {
403 yyTok = getToken();
404 } else {
405 ++parenlevel;
406 }
407 } else if (yyTok == Tok_Ident) {
408 continue;
409 } else if (parenlevel == 0) {
410 return false;
411 }
412 }
413 return true;
414}
415
416static const QString context()
417{
418 QString context(yyPackage);
419 bool innerClass = false;
420 for (int i = 0; i < yyScope.size(); ++i) {
421 if (yyScope.at(i)->type == Scope::Clazz) {
422 if (innerClass)
423 context.append(s: QLatin1String("$"));
424 else
425 context.append(s: QLatin1String("."));
426
427 context.append(s: yyScope.at(i)->name);
428 innerClass = true;
429 }
430 }
431 return context;
432}
433
434static void recordMessage(
435 Translator *tor, const QString &context, const QString &text, const QString &comment,
436 const QString &extracomment, bool plural, ConversionData &cd)
437{
438 TranslatorMessage msg(
439 context, text, comment, QString(),
440 yyFileName, yyLineNo, QStringList(),
441 TranslatorMessage::Unfinished, plural);
442 msg.setExtraComment(extracomment.simplified());
443 tor->extend(msg, cd);
444}
445
446static void parse(Translator *tor, ConversionData &cd)
447{
448 QString text;
449 QString com;
450 QString extracomment;
451
452 yyCh = getChar();
453
454 yyTok = getToken();
455 while ( yyTok != Tok_Eof ) {
456 switch ( yyTok ) {
457 case Tok_class:
458 yyTok = getToken();
459 if(yyTok == Tok_Ident) {
460 yyScope.push(t: new Scope(yyIdent, Scope::Clazz, yyLineNo));
461 }
462 else {
463 yyMsg() << qPrintable(LU::tr("'class' must be followed by a class name.\n"));
464 break;
465 }
466 while (!match(t: Tok_LeftBrace)) {
467 yyTok = getToken();
468 }
469 break;
470
471 case Tok_tr:
472 yyTok = getToken();
473 if ( match(t: Tok_LeftParen) && matchString(s&: text) ) {
474 com.clear();
475 bool plural = false;
476
477 if ( match(t: Tok_RightParen) ) {
478 // no comment
479 } else if (match(t: Tok_Comma) && matchStringOrNull(s&: com)) { //comment
480 if ( match(t: Tok_RightParen)) {
481 // ok,
482 } else if (match(t: Tok_Comma)) {
483 plural = true;
484 }
485 }
486 if (!text.isEmpty())
487 recordMessage(tor, context: context(), text, comment: com, extracomment, plural, cd);
488 }
489 break;
490 case Tok_translate:
491 {
492 QString contextOverride;
493 yyTok = getToken();
494 if ( match(t: Tok_LeftParen) &&
495 matchString(s&: contextOverride) &&
496 match(t: Tok_Comma) &&
497 matchString(s&: text) ) {
498
499 com.clear();
500 bool plural = false;
501 if (!match(t: Tok_RightParen)) {
502 // look for comment
503 if ( match(t: Tok_Comma) && matchStringOrNull(s&: com)) {
504 if (!match(t: Tok_RightParen)) {
505 if (match(t: Tok_Comma) && matchExpression() && match(t: Tok_RightParen)) {
506 plural = true;
507 } else {
508 break;
509 }
510 }
511 } else {
512 break;
513 }
514 }
515 if (!text.isEmpty())
516 recordMessage(tor, context: contextOverride, text, comment: com, extracomment, plural, cd);
517 }
518 }
519 break;
520
521 case Tok_Ident:
522 yyTok = getToken();
523 break;
524
525 case Tok_Comment:
526 if (yyComment.startsWith(c: QLatin1Char(':'))) {
527 yyComment.remove(i: 0, len: 1);
528 extracomment.append(s: yyComment);
529 }
530 yyTok = getToken();
531 break;
532
533 case Tok_RightBrace:
534 if ( yyScope.isEmpty() ) {
535 yyMsg() << qPrintable(LU::tr("Excess closing brace.\n"));
536 }
537 else
538 delete (yyScope.pop());
539 extracomment.clear();
540 yyTok = getToken();
541 break;
542
543 case Tok_LeftBrace:
544 yyScope.push(t: new Scope(QString(), Scope::Other, yyLineNo));
545 yyTok = getToken();
546 break;
547
548 case Tok_Semicolon:
549 extracomment.clear();
550 yyTok = getToken();
551 break;
552
553 case Tok_Package:
554 yyTok = getToken();
555 while(!match(t: Tok_Semicolon)) {
556 switch(yyTok) {
557 case Tok_Ident:
558 yyPackage.append(s: yyIdent);
559 break;
560 case Tok_Dot:
561 yyPackage.append(s: QLatin1String("."));
562 break;
563 default:
564 yyMsg() << qPrintable(LU::tr("'package' must be followed by package name.\n"));
565 break;
566 }
567 yyTok = getToken();
568 }
569 break;
570
571 default:
572 yyTok = getToken();
573 }
574 }
575
576 if ( !yyScope.isEmpty() )
577 yyMsg(line: yyScope.top()->line) << qPrintable(LU::tr("Unbalanced opening brace.\n"));
578 else if ( yyParenDepth != 0 )
579 yyMsg(line: yyParenLineNo) << qPrintable(LU::tr("Unbalanced opening parenthesis.\n"));
580}
581
582
583bool loadJava(Translator &translator, const QString &filename, ConversionData &cd)
584{
585 QFile file(filename);
586 if (!file.open(flags: QIODevice::ReadOnly)) {
587 cd.appendError(error: LU::tr(sourceText: "Cannot open %1: %2").arg(args: filename, args: file.errorString()));
588 return false;
589 }
590
591 yyInPos = -1;
592 yyFileName = filename;
593 yyPackage.clear();
594 yyScope.clear();
595 yyTok = -1;
596 yyParenDepth = 0;
597 yyCurLineNo = 0;
598 yyParenLineNo = 1;
599
600 QTextStream ts(&file);
601 ts.setCodec(QTextCodec::codecForName(name: cd.m_sourceIsUtf16 ? "UTF-16" : "UTF-8"));
602 ts.setAutoDetectUnicode(true);
603 yyInStr = ts.readAll();
604 yyInPos = 0;
605 yyFileName = filename;
606 yyCurLineNo = 1;
607 yyParenLineNo = 1;
608
609 parse(tor: &translator, cd);
610 return true;
611}
612
613QT_END_NAMESPACE
614

source code of qttools/src/linguist/lupdate/java.cpp