1 | // -*- c-basic-offset: 2 -*- |
2 | /* |
3 | * This file is part of the KDE libraries |
4 | * Copyright (C) 1999-2000 Harri Porten (porten@kde.org) |
5 | * Copyright (C) 2006 Apple Computer, Inc. |
6 | * Copyright (C) 2007 Cameron Zwarich (cwzwarich@uwaterloo.ca) |
7 | * |
8 | * This library is free software; you can redistribute it and/or |
9 | * modify it under the terms of the GNU Library General Public |
10 | * License as published by the Free Software Foundation; either |
11 | * version 2 of the License, or (at your option) any later version. |
12 | * |
13 | * This library is distributed in the hope that it will be useful, |
14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
16 | * Library General Public License for more details. |
17 | * |
18 | * You should have received a copy of the GNU Library General Public License |
19 | * along with this library; see the file COPYING.LIB. If not, write to |
20 | * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, |
21 | * Boston, MA 02110-1301, USA. |
22 | * |
23 | */ |
24 | |
25 | #include "lexer.h" |
26 | #include <config-kjs.h> |
27 | #include <string.h> |
28 | #include <limits.h> |
29 | |
30 | #include "dtoa.h" |
31 | #include "function.h" |
32 | #include "interpreter.h" |
33 | #include "nodes.h" |
34 | #include "commonunicode.h" |
35 | #include "wtf/ASCIICType.h" |
36 | #include "wtf/DisallowCType.h" |
37 | #include <wtf/unicode/libc/UnicodeLibC.h> |
38 | |
39 | using namespace WTF; |
40 | using namespace Unicode; |
41 | |
42 | // GCC cstring uses these automatically, but not all implementations do. |
43 | using std::strlen; |
44 | using std::strcpy; |
45 | using std::strncpy; |
46 | using std::memset; |
47 | using std::memcpy; |
48 | |
49 | // we can't specify the namespace in yacc's C output, so do it here |
50 | using namespace KJS; |
51 | |
52 | #ifndef KDE_USE_FINAL |
53 | #include "grammar.h" |
54 | #endif |
55 | |
56 | #include "lookup.h" |
57 | #include "lexer.lut.h" |
58 | |
59 | extern YYLTYPE kjsyylloc; // global bison variable holding token info |
60 | |
61 | // a bridge for yacc from the C world to C++ |
62 | int kjsyylex() |
63 | { |
64 | return lexer().lex(); |
65 | } |
66 | |
67 | namespace KJS { |
68 | |
69 | static bool isDecimalDigit(int c); |
70 | |
71 | static const size_t initialReadBufferCapacity = 32; |
72 | static const size_t initialStringTableCapacity = 64; |
73 | |
74 | Lexer& lexer() |
75 | { |
76 | // ASSERT(JSLock::currentThreadIsHoldingLock()); |
77 | |
78 | // FIXME: We'd like to avoid calling new here, but we don't currently |
79 | // support tearing down the Lexer at app quit time, since that would involve |
80 | // tearing down its UString data members without holding the JSLock. |
81 | static Lexer* staticLexer = new Lexer; |
82 | return *staticLexer; |
83 | } |
84 | |
85 | Lexer::Lexer() |
86 | : yylineno(0) |
87 | , restrKeyword(false) |
88 | , eatNextIdentifier(false) |
89 | , stackToken(-1) |
90 | , lastToken(-1) |
91 | , pos(0) |
92 | , code(0) |
93 | , length(0) |
94 | #ifndef KJS_PURE_ECMA |
95 | , bol(true) |
96 | #endif |
97 | , current(0) |
98 | , next1(0) |
99 | , next2(0) |
100 | , next3(0) |
101 | { |
102 | m_buffer8.reserveCapacity(initialReadBufferCapacity); |
103 | m_buffer16.reserveCapacity(initialReadBufferCapacity); |
104 | m_strings.reserveCapacity(initialStringTableCapacity); |
105 | m_identifiers.reserveCapacity(initialStringTableCapacity); |
106 | } |
107 | |
108 | void Lexer::setCode(const UString &sourceURL, int startingLineNumber, const KJS::UChar *c, unsigned int len) |
109 | { |
110 | yylineno = startingLineNumber; |
111 | m_sourceURL = sourceURL; |
112 | restrKeyword = false; |
113 | delimited = false; |
114 | eatNextIdentifier = false; |
115 | stackToken = -1; |
116 | lastToken = -1; |
117 | pos = 0; |
118 | code = c; |
119 | length = len; |
120 | skipLF = false; |
121 | skipCR = false; |
122 | error = false; |
123 | #ifndef KJS_PURE_ECMA |
124 | bol = true; |
125 | #endif |
126 | |
127 | // read first characters |
128 | current = (length > 0) ? code[0].uc : -1; |
129 | next1 = (length > 1) ? code[1].uc : -1; |
130 | next2 = (length > 2) ? code[2].uc : -1; |
131 | next3 = (length > 3) ? code[3].uc : -1; |
132 | } |
133 | |
134 | void Lexer::shift(unsigned int p) |
135 | { |
136 | // Here would be a good place to strip Cf characters, but that has caused compatibility problems: |
137 | // <http://bugs.webkit.org/show_bug.cgi?id=10183>. |
138 | while (p--) { |
139 | current = next1; |
140 | next1 = next2; |
141 | next2 = next3; |
142 | pos++; |
143 | next3 = (pos + 3 < length) ? code[pos + 3].uc : -1; |
144 | } |
145 | } |
146 | |
147 | // called on each new line |
148 | void Lexer::nextLine() |
149 | { |
150 | yylineno++; |
151 | #ifndef KJS_PURE_ECMA |
152 | bol = true; |
153 | #endif |
154 | } |
155 | |
156 | void Lexer::setDone(State s) |
157 | { |
158 | state = s; |
159 | done = true; |
160 | } |
161 | |
162 | int Lexer::lex() |
163 | { |
164 | int token = 0; |
165 | state = Start; |
166 | unsigned short stringType = 0; // either single or double quotes |
167 | m_buffer8.clear(); |
168 | m_buffer16.clear(); |
169 | done = false; |
170 | terminator = false; |
171 | skipLF = false; |
172 | skipCR = false; |
173 | |
174 | // did we push a token on the stack previously ? |
175 | // (after an automatic semicolon insertion) |
176 | if (stackToken >= 0) { |
177 | setDone(Other); |
178 | token = stackToken; |
179 | stackToken = 0; |
180 | } |
181 | |
182 | while (!done) { |
183 | if (skipLF && current != '\n') // found \r but not \n afterwards |
184 | skipLF = false; |
185 | if (skipCR && current != '\r') // found \n but not \r afterwards |
186 | skipCR = false; |
187 | if (skipLF || skipCR) // found \r\n or \n\r -> eat the second one |
188 | { |
189 | skipLF = false; |
190 | skipCR = false; |
191 | shift(1); |
192 | } |
193 | switch (state) { |
194 | case Start: |
195 | if (isWhiteSpace()) { |
196 | // do nothing |
197 | } else if (current == '/' && next1 == '/') { |
198 | shift(1); |
199 | state = InSingleLineComment; |
200 | } else if (current == '/' && next1 == '*') { |
201 | shift(1); |
202 | state = InMultiLineComment; |
203 | } else if (current == -1) { |
204 | if (!terminator && !delimited) { |
205 | // automatic semicolon insertion if program incomplete |
206 | token = ';'; |
207 | stackToken = 0; |
208 | setDone(Other); |
209 | } else |
210 | setDone(Eof); |
211 | } else if (isLineTerminator()) { |
212 | nextLine(); |
213 | terminator = true; |
214 | if (restrKeyword) { |
215 | token = ';'; |
216 | setDone(Other); |
217 | } |
218 | } else if (current == '"' || current == '\'') { |
219 | state = InString; |
220 | stringType = static_cast<unsigned short>(current); |
221 | } else if (isIdentStart(current)) { |
222 | record16(current); |
223 | state = InIdentifierOrKeyword; |
224 | } else if (current == '\\') { |
225 | state = InIdentifierStartUnicodeEscapeStart; |
226 | } else if (current == '0') { |
227 | record8(current); |
228 | state = InNum0; |
229 | } else if (isDecimalDigit(current)) { |
230 | record8(current); |
231 | state = InNum; |
232 | } else if (current == '.' && isDecimalDigit(next1)) { |
233 | record8(current); |
234 | state = InDecimal; |
235 | #ifndef KJS_PURE_ECMA |
236 | // <!-- marks the beginning of a line comment (for www usage) |
237 | } else if (current == '<' && next1 == '!' && |
238 | next2 == '-' && next3 == '-') { |
239 | shift(3); |
240 | state = InSingleLineComment; |
241 | // same for --> |
242 | } else if (bol && current == '-' && next1 == '-' && next2 == '>') { |
243 | shift(2); |
244 | state = InSingleLineComment; |
245 | #endif |
246 | } else { |
247 | token = matchPunctuator(current, next1, next2, next3); |
248 | if (token != -1) { |
249 | setDone(Other); |
250 | } else { |
251 | // cerr << "encountered unknown character" << endl; |
252 | setDone(Bad); |
253 | } |
254 | } |
255 | break; |
256 | case InString: |
257 | switch (current) { |
258 | case '\'': |
259 | case '"': |
260 | if (current == stringType) { |
261 | shift(1); |
262 | setDone(String); |
263 | } else { |
264 | record16(current); |
265 | } |
266 | break; |
267 | case '\\': |
268 | state = InEscapeSequence; |
269 | break; |
270 | case '\n': |
271 | case '\r': |
272 | case 0x2028: |
273 | case 0x2029: |
274 | case -1: |
275 | // encountered newline or eof |
276 | setDone(Bad); |
277 | break; |
278 | default: |
279 | record16(current); |
280 | break; |
281 | } |
282 | break; |
283 | // Escape Sequences inside of strings |
284 | case InEscapeSequence: |
285 | if (isOctalDigit(current)) { |
286 | if (current >= '0' && current <= '3' && |
287 | isOctalDigit(next1) && isOctalDigit(next2)) { |
288 | record16(convertOctal(current, next1, next2)); |
289 | shift(2); |
290 | state = InString; |
291 | } else if (isOctalDigit(current) && isOctalDigit(next1)) { |
292 | record16(convertOctal('0', current, next1)); |
293 | shift(1); |
294 | state = InString; |
295 | } else if (isOctalDigit(current)) { |
296 | record16(convertOctal('0', '0', current)); |
297 | state = InString; |
298 | } else { |
299 | setDone(Bad); |
300 | } |
301 | } else if (current == 'x') |
302 | state = InHexEscape; |
303 | else if (current == 'u') |
304 | state = InUnicodeEscape; |
305 | else if (isLineTerminator()) { |
306 | nextLine(); |
307 | state = InString; |
308 | } else { |
309 | record16(singleEscape(static_cast<unsigned short>(current))); |
310 | state = InString; |
311 | } |
312 | break; |
313 | case InHexEscape: |
314 | if (isHexDigit(current) && isHexDigit(next1)) { |
315 | state = InString; |
316 | record16(convertHex(current, next1)); |
317 | shift(1); |
318 | } else { |
319 | setDone(Bad); |
320 | } |
321 | break; |
322 | case InUnicodeEscape: |
323 | if (isHexDigit(current) && isHexDigit(next1) && isHexDigit(next2) && isHexDigit(next3)) { |
324 | record16(convertUnicode(current, next1, next2, next3)); |
325 | shift(3); |
326 | state = InString; |
327 | } else if (current == stringType) { |
328 | record16('u'); |
329 | shift(1); |
330 | setDone(String); |
331 | } else { |
332 | setDone(Bad); |
333 | } |
334 | break; |
335 | case InSingleLineComment: |
336 | if (isLineTerminator()) { |
337 | nextLine(); |
338 | terminator = true; |
339 | if (restrKeyword) { |
340 | token = ';'; |
341 | setDone(Other); |
342 | } else |
343 | state = Start; |
344 | } else if (current == -1) { |
345 | setDone(Eof); |
346 | } |
347 | break; |
348 | case InMultiLineComment: |
349 | if (current == -1) { |
350 | setDone(Bad); |
351 | } else if (isLineTerminator()) { |
352 | nextLine(); |
353 | } else if (current == '*' && next1 == '/') { |
354 | state = Start; |
355 | shift(1); |
356 | } |
357 | break; |
358 | case InIdentifierOrKeyword: |
359 | case InIdentifier: |
360 | if (isIdentPart(current)) |
361 | record16(current); |
362 | else if (current == '\\') |
363 | state = InIdentifierPartUnicodeEscapeStart; |
364 | else |
365 | setDone(state == InIdentifierOrKeyword ? IdentifierOrKeyword : Identifier); |
366 | break; |
367 | case InNum0: |
368 | if (current == 'x' || current == 'X') { |
369 | record8(current); |
370 | state = InHex; |
371 | } else if (current == '.') { |
372 | record8(current); |
373 | state = InDecimal; |
374 | } else if (current == 'e' || current == 'E') { |
375 | record8(current); |
376 | state = InExponentIndicator; |
377 | } else if (isOctalDigit(current)) { |
378 | record8(current); |
379 | state = InOctal; |
380 | } else if (isDecimalDigit(current)) { |
381 | record8(current); |
382 | state = InDecimal; |
383 | } else { |
384 | setDone(Number); |
385 | } |
386 | break; |
387 | case InHex: |
388 | if (isHexDigit(current)) { |
389 | record8(current); |
390 | } else { |
391 | setDone(Hex); |
392 | } |
393 | break; |
394 | case InOctal: |
395 | if (isOctalDigit(current)) { |
396 | record8(current); |
397 | } |
398 | else if (isDecimalDigit(current)) { |
399 | record8(current); |
400 | state = InDecimal; |
401 | } else |
402 | setDone(Octal); |
403 | break; |
404 | case InNum: |
405 | if (isDecimalDigit(current)) { |
406 | record8(current); |
407 | } else if (current == '.') { |
408 | record8(current); |
409 | state = InDecimal; |
410 | } else if (current == 'e' || current == 'E') { |
411 | record8(current); |
412 | state = InExponentIndicator; |
413 | } else |
414 | setDone(Number); |
415 | break; |
416 | case InDecimal: |
417 | if (isDecimalDigit(current)) { |
418 | record8(current); |
419 | } else if (current == 'e' || current == 'E') { |
420 | record8(current); |
421 | state = InExponentIndicator; |
422 | } else |
423 | setDone(Number); |
424 | break; |
425 | case InExponentIndicator: |
426 | if (current == '+' || current == '-') { |
427 | record8(current); |
428 | } else if (isDecimalDigit(current)) { |
429 | record8(current); |
430 | state = InExponent; |
431 | } else |
432 | setDone(Bad); |
433 | break; |
434 | case InExponent: |
435 | if (isDecimalDigit(current)) { |
436 | record8(current); |
437 | } else |
438 | setDone(Number); |
439 | break; |
440 | case InIdentifierStartUnicodeEscapeStart: |
441 | if (current == 'u') |
442 | state = InIdentifierStartUnicodeEscape; |
443 | else |
444 | setDone(Bad); |
445 | break; |
446 | case InIdentifierPartUnicodeEscapeStart: |
447 | if (current == 'u') |
448 | state = InIdentifierPartUnicodeEscape; |
449 | else |
450 | setDone(Bad); |
451 | break; |
452 | case InIdentifierStartUnicodeEscape: |
453 | if (!isHexDigit(current) || !isHexDigit(next1) || !isHexDigit(next2) || !isHexDigit(next3)) { |
454 | setDone(Bad); |
455 | break; |
456 | } |
457 | token = convertUnicode(current, next1, next2, next3).uc; |
458 | shift(3); |
459 | if (!isIdentStart(token)) { |
460 | setDone(Bad); |
461 | break; |
462 | } |
463 | record16(token); |
464 | state = InIdentifier; |
465 | break; |
466 | case InIdentifierPartUnicodeEscape: |
467 | if (!isHexDigit(current) || !isHexDigit(next1) || !isHexDigit(next2) || !isHexDigit(next3)) { |
468 | setDone(Bad); |
469 | break; |
470 | } |
471 | token = convertUnicode(current, next1, next2, next3).uc; |
472 | shift(3); |
473 | if (!isIdentPart(token)) { |
474 | setDone(Bad); |
475 | break; |
476 | } |
477 | record16(token); |
478 | state = InIdentifier; |
479 | break; |
480 | default: |
481 | assert(!"Unhandled state in switch statement" ); |
482 | } |
483 | |
484 | // move on to the next character |
485 | if (!done) |
486 | shift(1); |
487 | #ifndef KJS_PURE_ECMA |
488 | if (state != Start && state != InMultiLineComment) |
489 | bol = false; |
490 | #endif |
491 | } |
492 | |
493 | // no identifiers allowed directly after numeric literal, e.g. "3in" is bad |
494 | if ((state == Number || state == Octal || state == Hex) && isIdentStart(current)) |
495 | state = Bad; |
496 | |
497 | // terminate string |
498 | m_buffer8.append('\0'); |
499 | |
500 | #ifdef KJS_DEBUG_LEX |
501 | fprintf(stderr, "line: %d " , lineNo()); |
502 | fprintf(stderr, "yytext (%x): " , m_buffer8[0]); |
503 | fprintf(stderr, "%s " , m_buffer8.data()); |
504 | #endif |
505 | |
506 | double dval = 0; |
507 | if (state == Number) { |
508 | dval = kjs_strtod(m_buffer8.data(), 0L); |
509 | } else if (state == Hex) { // scan hex numbers |
510 | const char *p = m_buffer8.data() + 2; |
511 | while (char c = *p++) { |
512 | dval *= 16; |
513 | dval += convertHex(c); |
514 | } |
515 | |
516 | if (dval >= mantissaOverflowLowerBound) |
517 | dval = parseIntOverflow(m_buffer8.data() + 2, p - (m_buffer8.data() + 3), 16); |
518 | |
519 | state = Number; |
520 | } else if (state == Octal) { // scan octal number |
521 | const char *p = m_buffer8.data() + 1; |
522 | while (char c = *p++) { |
523 | dval *= 8; |
524 | dval += c - '0'; |
525 | } |
526 | |
527 | if (dval >= mantissaOverflowLowerBound) |
528 | dval = parseIntOverflow(m_buffer8.data() + 1, p - (m_buffer8.data() + 2), 8); |
529 | |
530 | state = Number; |
531 | } |
532 | |
533 | #ifdef KJS_DEBUG_LEX |
534 | switch (state) { |
535 | case Eof: |
536 | printf("(EOF)\n" ); |
537 | break; |
538 | case Other: |
539 | printf("(Other)\n" ); |
540 | break; |
541 | case Identifier: |
542 | printf("(Identifier)/(Keyword)\n" ); |
543 | break; |
544 | case String: |
545 | printf("(String)\n" ); |
546 | break; |
547 | case Number: |
548 | printf("(Number)\n" ); |
549 | break; |
550 | default: |
551 | printf("(unknown)" ); |
552 | } |
553 | #endif |
554 | |
555 | if (state != Identifier && eatNextIdentifier) |
556 | eatNextIdentifier = false; |
557 | |
558 | restrKeyword = false; |
559 | delimited = false; |
560 | kjsyylloc.first_line = yylineno; // ??? |
561 | kjsyylloc.last_line = yylineno; |
562 | |
563 | switch (state) { |
564 | case Eof: |
565 | token = 0; |
566 | break; |
567 | case Other: |
568 | if(token == '}' || token == ';') { |
569 | delimited = true; |
570 | } |
571 | break; |
572 | case IdentifierOrKeyword: |
573 | if ((token = Lookup::find(&mainTable, m_buffer16.data(), m_buffer16.size())) < 0) { |
574 | case Identifier: |
575 | // Lookup for keyword failed, means this is an identifier |
576 | // Apply anonymous-function hack below (eat the identifier) |
577 | if (eatNextIdentifier) { |
578 | eatNextIdentifier = false; |
579 | token = lex(); |
580 | break; |
581 | } |
582 | kjsyylval.ident = makeIdentifier(m_buffer16); |
583 | token = IDENT; |
584 | break; |
585 | } |
586 | |
587 | eatNextIdentifier = false; |
588 | // Hack for "f = function somename() { ... }", too hard to get into the grammar |
589 | if (token == FUNCTION && lastToken == '=' ) |
590 | eatNextIdentifier = true; |
591 | |
592 | if (token == CONTINUE || token == BREAK || |
593 | token == RETURN || token == THROW) |
594 | restrKeyword = true; |
595 | break; |
596 | case String: |
597 | kjsyylval.ustr = makeUString(m_buffer16); |
598 | token = STRING; |
599 | break; |
600 | case Number: |
601 | kjsyylval.dval = dval; |
602 | token = NUMBER; |
603 | break; |
604 | case Bad: |
605 | #ifdef KJS_DEBUG_LEX |
606 | fprintf(stderr, "KJS: yylex: ERROR.\n" ); |
607 | #endif |
608 | error = true; |
609 | return -1; |
610 | default: |
611 | assert(!"unhandled numeration value in switch" ); |
612 | error = true; |
613 | return -1; |
614 | } |
615 | lastToken = token; |
616 | return token; |
617 | } |
618 | |
619 | bool Lexer::isWhiteSpace() const |
620 | { |
621 | return CommonUnicode::isWhiteSpace(current); |
622 | } |
623 | |
624 | bool Lexer::isLineTerminator() |
625 | { |
626 | bool cr = (current == '\r'); |
627 | bool lf = (current == '\n'); |
628 | if (cr) |
629 | skipLF = true; |
630 | else if (lf) |
631 | skipCR = true; |
632 | return cr || lf || current == 0x2028 || current == 0x2029; |
633 | } |
634 | |
635 | typedef bool (CharacterCheck)(int c); |
636 | |
637 | static bool isIdentStartLibC(int c) |
638 | { |
639 | return (category(c) & (Letter_Uppercase | Letter_Lowercase | |
640 | Letter_Titlecase | Letter_Modifier | Letter_Other)) |
641 | || c == '$' || c == '_'; |
642 | } |
643 | |
644 | static bool isIdentPartLibC(int c) |
645 | { |
646 | return (category(c) & (Letter_Uppercase | Letter_Lowercase | |
647 | Letter_Titlecase | Letter_Modifier | Letter_Other | |
648 | Mark_NonSpacing | Mark_SpacingCombining | |
649 | Number_DecimalDigit | Punctuation_Connector)) |
650 | || c == '$' || c == '_'; |
651 | } |
652 | |
653 | static CharacterCheck *identStart = ::isIdentStartLibC; |
654 | static CharacterCheck *identPart = ::isIdentPartLibC; |
655 | |
656 | void Lexer::setIdentStartChecker(bool (*f)(int c)) |
657 | { |
658 | identStart = f; |
659 | } |
660 | |
661 | void Lexer::setIdentPartChecker(bool (*f)(int c)) |
662 | { |
663 | identPart = f; |
664 | } |
665 | |
666 | bool Lexer::isIdentStart(int c) |
667 | { |
668 | return (*identStart)(c); |
669 | } |
670 | |
671 | bool Lexer::isIdentPart(int c) |
672 | { |
673 | return (*identPart)(c); |
674 | } |
675 | |
676 | static bool isDecimalDigit(int c) |
677 | { |
678 | return (c >= '0' && c <= '9'); |
679 | } |
680 | |
681 | bool Lexer::isHexDigit(int c) |
682 | { |
683 | return ((c >= '0' && c <= '9') || |
684 | (c >= 'a' && c <= 'f') || |
685 | (c >= 'A' && c <= 'F')); |
686 | } |
687 | |
688 | bool Lexer::isOctalDigit(int c) |
689 | { |
690 | return (c >= '0' && c <= '7'); |
691 | } |
692 | |
693 | int Lexer::matchPunctuator(int c1, int c2, int c3, int c4) |
694 | { |
695 | if (c1 == '>' && c2 == '>' && c3 == '>' && c4 == '=') { |
696 | shift(4); |
697 | return URSHIFTEQUAL; |
698 | } else if (c1 == '=' && c2 == '=' && c3 == '=') { |
699 | shift(3); |
700 | return STREQ; |
701 | } else if (c1 == '!' && c2 == '=' && c3 == '=') { |
702 | shift(3); |
703 | return STRNEQ; |
704 | } else if (c1 == '>' && c2 == '>' && c3 == '>') { |
705 | shift(3); |
706 | return URSHIFT; |
707 | } else if (c1 == '<' && c2 == '<' && c3 == '=') { |
708 | shift(3); |
709 | return LSHIFTEQUAL; |
710 | } else if (c1 == '>' && c2 == '>' && c3 == '=') { |
711 | shift(3); |
712 | return RSHIFTEQUAL; |
713 | } else if (c1 == '<' && c2 == '=') { |
714 | shift(2); |
715 | return LE; |
716 | } else if (c1 == '>' && c2 == '=') { |
717 | shift(2); |
718 | return GE; |
719 | } else if (c1 == '!' && c2 == '=') { |
720 | shift(2); |
721 | return NE; |
722 | } else if (c1 == '+' && c2 == '+') { |
723 | shift(2); |
724 | if (terminator) |
725 | return AUTOPLUSPLUS; |
726 | else |
727 | return PLUSPLUS; |
728 | } else if (c1 == '-' && c2 == '-') { |
729 | shift(2); |
730 | if (terminator) |
731 | return AUTOMINUSMINUS; |
732 | else |
733 | return MINUSMINUS; |
734 | } else if (c1 == '=' && c2 == '=') { |
735 | shift(2); |
736 | return EQEQ; |
737 | } else if (c1 == '+' && c2 == '=') { |
738 | shift(2); |
739 | return PLUSEQUAL; |
740 | } else if (c1 == '-' && c2 == '=') { |
741 | shift(2); |
742 | return MINUSEQUAL; |
743 | } else if (c1 == '*' && c2 == '=') { |
744 | shift(2); |
745 | return MULTEQUAL; |
746 | } else if (c1 == '/' && c2 == '=') { |
747 | shift(2); |
748 | return DIVEQUAL; |
749 | } else if (c1 == '&' && c2 == '=') { |
750 | shift(2); |
751 | return ANDEQUAL; |
752 | } else if (c1 == '^' && c2 == '=') { |
753 | shift(2); |
754 | return XOREQUAL; |
755 | } else if (c1 == '%' && c2 == '=') { |
756 | shift(2); |
757 | return MODEQUAL; |
758 | } else if (c1 == '|' && c2 == '=') { |
759 | shift(2); |
760 | return OREQUAL; |
761 | } else if (c1 == '<' && c2 == '<') { |
762 | shift(2); |
763 | return LSHIFT; |
764 | } else if (c1 == '>' && c2 == '>') { |
765 | shift(2); |
766 | return RSHIFT; |
767 | } else if (c1 == '&' && c2 == '&') { |
768 | shift(2); |
769 | return AND; |
770 | } else if (c1 == '|' && c2 == '|') { |
771 | shift(2); |
772 | return OR; |
773 | } |
774 | |
775 | switch(c1) { |
776 | case '=': |
777 | case '>': |
778 | case '<': |
779 | case ',': |
780 | case '!': |
781 | case '~': |
782 | case '?': |
783 | case ':': |
784 | case '.': |
785 | case '+': |
786 | case '-': |
787 | case '*': |
788 | case '/': |
789 | case '&': |
790 | case '|': |
791 | case '^': |
792 | case '%': |
793 | case '(': |
794 | case ')': |
795 | case '{': |
796 | case '}': |
797 | case '[': |
798 | case ']': |
799 | case ';': |
800 | shift(1); |
801 | return static_cast<int>(c1); |
802 | default: |
803 | return -1; |
804 | } |
805 | } |
806 | |
807 | unsigned short Lexer::singleEscape(unsigned short c) |
808 | { |
809 | switch(c) { |
810 | case 'b': |
811 | return 0x08; |
812 | case 't': |
813 | return 0x09; |
814 | case 'n': |
815 | return 0x0A; |
816 | case 'v': |
817 | return 0x0B; |
818 | case 'f': |
819 | return 0x0C; |
820 | case 'r': |
821 | return 0x0D; |
822 | case '"': |
823 | return 0x22; |
824 | case '\'': |
825 | return 0x27; |
826 | case '\\': |
827 | return 0x5C; |
828 | default: |
829 | return c; |
830 | } |
831 | } |
832 | |
833 | unsigned short Lexer::convertOctal(int c1, int c2, int c3) |
834 | { |
835 | return static_cast<unsigned short>((c1 - '0') * 64 + (c2 - '0') * 8 + c3 - '0'); |
836 | } |
837 | |
838 | unsigned char Lexer::convertHex(int c) |
839 | { |
840 | if (c >= '0' && c <= '9') |
841 | return static_cast<unsigned char>(c - '0'); |
842 | if (c >= 'a' && c <= 'f') |
843 | return static_cast<unsigned char>(c - 'a' + 10); |
844 | return static_cast<unsigned char>(c - 'A' + 10); |
845 | } |
846 | |
847 | unsigned char Lexer::convertHex(int c1, int c2) |
848 | { |
849 | return ((convertHex(c1) << 4) + convertHex(c2)); |
850 | } |
851 | |
852 | KJS::UChar Lexer::convertUnicode(int c1, int c2, int c3, int c4) |
853 | { |
854 | return KJS::UChar((convertHex(c1) << 4) + convertHex(c2), |
855 | (convertHex(c3) << 4) + convertHex(c4)); |
856 | } |
857 | |
858 | void Lexer::record8(int c) |
859 | { |
860 | ASSERT(c >= 0); |
861 | ASSERT(c <= 0xff); |
862 | m_buffer8.append(c); |
863 | } |
864 | |
865 | void Lexer::record16(int c) |
866 | { |
867 | ASSERT(c >= 0); |
868 | ASSERT(c <= USHRT_MAX); |
869 | record16(UChar(static_cast<unsigned short>(c))); |
870 | } |
871 | |
872 | void Lexer::record16(KJS::UChar c) |
873 | { |
874 | m_buffer16.append(c); |
875 | } |
876 | |
877 | bool Lexer::scanRegExp() |
878 | { |
879 | m_buffer16.clear(); |
880 | bool lastWasEscape = false; |
881 | bool inBrackets = false; |
882 | |
883 | while (1) { |
884 | if (isLineTerminator() || current == -1) |
885 | return false; |
886 | else if (current != '/' || lastWasEscape == true || inBrackets == true) |
887 | { |
888 | // keep track of '[' and ']' |
889 | if (!lastWasEscape) { |
890 | if ( current == '[' && !inBrackets ) |
891 | inBrackets = true; |
892 | if ( current == ']' && inBrackets ) |
893 | inBrackets = false; |
894 | } |
895 | record16(current); |
896 | lastWasEscape = |
897 | !lastWasEscape && (current == '\\'); |
898 | } else { // end of regexp |
899 | m_pattern = UString(m_buffer16); |
900 | m_buffer16.clear(); |
901 | shift(1); |
902 | break; |
903 | } |
904 | shift(1); |
905 | } |
906 | |
907 | while (isIdentPart(current)) { |
908 | record16(current); |
909 | shift(1); |
910 | } |
911 | m_flags = UString(m_buffer16); |
912 | |
913 | return true; |
914 | } |
915 | |
916 | |
917 | void Lexer::clear() |
918 | { |
919 | deleteAllValues(m_strings); |
920 | Vector<UString*> newStrings; |
921 | newStrings.reserveCapacity(initialStringTableCapacity); |
922 | m_strings.swap(newStrings); |
923 | deleteAllValues(m_identifiers); |
924 | Vector<KJS::Identifier*> newIdentifiers; |
925 | newIdentifiers.reserveCapacity(initialStringTableCapacity); |
926 | m_identifiers.swap(newIdentifiers); |
927 | |
928 | Vector<char> newBuffer8; |
929 | newBuffer8.reserveCapacity(initialReadBufferCapacity); |
930 | m_buffer8.swap(newBuffer8); |
931 | |
932 | Vector<UChar> newBuffer16; |
933 | newBuffer16.reserveCapacity(initialReadBufferCapacity); |
934 | m_buffer16.swap(newBuffer16); |
935 | |
936 | m_pattern = 0; |
937 | m_flags = 0; |
938 | m_sourceURL = 0; |
939 | } |
940 | |
941 | Identifier* Lexer::makeIdentifier(const Vector<KJS::UChar>& buffer) |
942 | { |
943 | KJS::Identifier* identifier = new KJS::Identifier(buffer.data(), buffer.size()); |
944 | m_identifiers.append(identifier); |
945 | return identifier; |
946 | } |
947 | |
948 | UString* Lexer::makeUString(const Vector<KJS::UChar>& buffer) |
949 | { |
950 | UString* string = new UString(buffer); |
951 | m_strings.append(string); |
952 | return string; |
953 | } |
954 | |
955 | } // namespace KJS |
956 | |