1// -*- c-basic-offset: 2 -*-
2/*
3 * This file is part of the KDE libraries
4 * Copyright (C) 1999-2000 Harri Porten (porten@kde.org)
5 * Copyright (C) 2006 Apple Computer, Inc.
6 * Copyright (C) 2007 Cameron Zwarich (cwzwarich@uwaterloo.ca)
7 *
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Library General Public
10 * License as published by the Free Software Foundation; either
11 * version 2 of the License, or (at your option) any later version.
12 *
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Library General Public License for more details.
17 *
18 * You should have received a copy of the GNU Library General Public License
19 * along with this library; see the file COPYING.LIB. If not, write to
20 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
21 * Boston, MA 02110-1301, USA.
22 *
23 */
24
25#include "lexer.h"
26#include <config-kjs.h>
27#include <string.h>
28#include <limits.h>
29
30#include "dtoa.h"
31#include "function.h"
32#include "interpreter.h"
33#include "nodes.h"
34#include "commonunicode.h"
35#include "wtf/ASCIICType.h"
36#include "wtf/DisallowCType.h"
37#include <wtf/unicode/libc/UnicodeLibC.h>
38
39using namespace WTF;
40using namespace Unicode;
41
42// GCC cstring uses these automatically, but not all implementations do.
43using std::strlen;
44using std::strcpy;
45using std::strncpy;
46using std::memset;
47using std::memcpy;
48
49// we can't specify the namespace in yacc's C output, so do it here
50using namespace KJS;
51
52#ifndef KDE_USE_FINAL
53#include "grammar.h"
54#endif
55
56#include "lookup.h"
57#include "lexer.lut.h"
58
59extern YYLTYPE kjsyylloc; // global bison variable holding token info
60
61// a bridge for yacc from the C world to C++
62int kjsyylex()
63{
64 return lexer().lex();
65}
66
67namespace KJS {
68
69static bool isDecimalDigit(int c);
70
71static const size_t initialReadBufferCapacity = 32;
72static const size_t initialStringTableCapacity = 64;
73
74Lexer& lexer()
75{
76 // ASSERT(JSLock::currentThreadIsHoldingLock());
77
78 // FIXME: We'd like to avoid calling new here, but we don't currently
79 // support tearing down the Lexer at app quit time, since that would involve
80 // tearing down its UString data members without holding the JSLock.
81 static Lexer* staticLexer = new Lexer;
82 return *staticLexer;
83}
84
85Lexer::Lexer()
86 : yylineno(0)
87 , restrKeyword(false)
88 , eatNextIdentifier(false)
89 , stackToken(-1)
90 , lastToken(-1)
91 , pos(0)
92 , code(0)
93 , length(0)
94#ifndef KJS_PURE_ECMA
95 , bol(true)
96#endif
97 , current(0)
98 , next1(0)
99 , next2(0)
100 , next3(0)
101{
102 m_buffer8.reserveCapacity(initialReadBufferCapacity);
103 m_buffer16.reserveCapacity(initialReadBufferCapacity);
104 m_strings.reserveCapacity(initialStringTableCapacity);
105 m_identifiers.reserveCapacity(initialStringTableCapacity);
106}
107
108void Lexer::setCode(const UString &sourceURL, int startingLineNumber, const KJS::UChar *c, unsigned int len)
109{
110 yylineno = startingLineNumber;
111 m_sourceURL = sourceURL;
112 restrKeyword = false;
113 delimited = false;
114 eatNextIdentifier = false;
115 stackToken = -1;
116 lastToken = -1;
117 pos = 0;
118 code = c;
119 length = len;
120 skipLF = false;
121 skipCR = false;
122 error = false;
123#ifndef KJS_PURE_ECMA
124 bol = true;
125#endif
126
127 // read first characters
128 current = (length > 0) ? code[0].uc : -1;
129 next1 = (length > 1) ? code[1].uc : -1;
130 next2 = (length > 2) ? code[2].uc : -1;
131 next3 = (length > 3) ? code[3].uc : -1;
132}
133
134void Lexer::shift(unsigned int p)
135{
136 // Here would be a good place to strip Cf characters, but that has caused compatibility problems:
137 // <http://bugs.webkit.org/show_bug.cgi?id=10183>.
138 while (p--) {
139 current = next1;
140 next1 = next2;
141 next2 = next3;
142 pos++;
143 next3 = (pos + 3 < length) ? code[pos + 3].uc : -1;
144 }
145}
146
147// called on each new line
148void Lexer::nextLine()
149{
150 yylineno++;
151#ifndef KJS_PURE_ECMA
152 bol = true;
153#endif
154}
155
156void Lexer::setDone(State s)
157{
158 state = s;
159 done = true;
160}
161
162int Lexer::lex()
163{
164 int token = 0;
165 state = Start;
166 unsigned short stringType = 0; // either single or double quotes
167 m_buffer8.clear();
168 m_buffer16.clear();
169 done = false;
170 terminator = false;
171 skipLF = false;
172 skipCR = false;
173
174 // did we push a token on the stack previously ?
175 // (after an automatic semicolon insertion)
176 if (stackToken >= 0) {
177 setDone(Other);
178 token = stackToken;
179 stackToken = 0;
180 }
181
182 while (!done) {
183 if (skipLF && current != '\n') // found \r but not \n afterwards
184 skipLF = false;
185 if (skipCR && current != '\r') // found \n but not \r afterwards
186 skipCR = false;
187 if (skipLF || skipCR) // found \r\n or \n\r -> eat the second one
188 {
189 skipLF = false;
190 skipCR = false;
191 shift(1);
192 }
193 switch (state) {
194 case Start:
195 if (isWhiteSpace()) {
196 // do nothing
197 } else if (current == '/' && next1 == '/') {
198 shift(1);
199 state = InSingleLineComment;
200 } else if (current == '/' && next1 == '*') {
201 shift(1);
202 state = InMultiLineComment;
203 } else if (current == -1) {
204 if (!terminator && !delimited) {
205 // automatic semicolon insertion if program incomplete
206 token = ';';
207 stackToken = 0;
208 setDone(Other);
209 } else
210 setDone(Eof);
211 } else if (isLineTerminator()) {
212 nextLine();
213 terminator = true;
214 if (restrKeyword) {
215 token = ';';
216 setDone(Other);
217 }
218 } else if (current == '"' || current == '\'') {
219 state = InString;
220 stringType = static_cast<unsigned short>(current);
221 } else if (isIdentStart(current)) {
222 record16(current);
223 state = InIdentifierOrKeyword;
224 } else if (current == '\\') {
225 state = InIdentifierStartUnicodeEscapeStart;
226 } else if (current == '0') {
227 record8(current);
228 state = InNum0;
229 } else if (isDecimalDigit(current)) {
230 record8(current);
231 state = InNum;
232 } else if (current == '.' && isDecimalDigit(next1)) {
233 record8(current);
234 state = InDecimal;
235#ifndef KJS_PURE_ECMA
236 // <!-- marks the beginning of a line comment (for www usage)
237 } else if (current == '<' && next1 == '!' &&
238 next2 == '-' && next3 == '-') {
239 shift(3);
240 state = InSingleLineComment;
241 // same for -->
242 } else if (bol && current == '-' && next1 == '-' && next2 == '>') {
243 shift(2);
244 state = InSingleLineComment;
245#endif
246 } else {
247 token = matchPunctuator(current, next1, next2, next3);
248 if (token != -1) {
249 setDone(Other);
250 } else {
251 // cerr << "encountered unknown character" << endl;
252 setDone(Bad);
253 }
254 }
255 break;
256 case InString:
257 switch (current) {
258 case '\'':
259 case '"':
260 if (current == stringType) {
261 shift(1);
262 setDone(String);
263 } else {
264 record16(current);
265 }
266 break;
267 case '\\':
268 state = InEscapeSequence;
269 break;
270 case '\n':
271 case '\r':
272 case 0x2028:
273 case 0x2029:
274 case -1:
275 // encountered newline or eof
276 setDone(Bad);
277 break;
278 default:
279 record16(current);
280 break;
281 }
282 break;
283 // Escape Sequences inside of strings
284 case InEscapeSequence:
285 if (isOctalDigit(current)) {
286 if (current >= '0' && current <= '3' &&
287 isOctalDigit(next1) && isOctalDigit(next2)) {
288 record16(convertOctal(current, next1, next2));
289 shift(2);
290 state = InString;
291 } else if (isOctalDigit(current) && isOctalDigit(next1)) {
292 record16(convertOctal('0', current, next1));
293 shift(1);
294 state = InString;
295 } else if (isOctalDigit(current)) {
296 record16(convertOctal('0', '0', current));
297 state = InString;
298 } else {
299 setDone(Bad);
300 }
301 } else if (current == 'x')
302 state = InHexEscape;
303 else if (current == 'u')
304 state = InUnicodeEscape;
305 else if (isLineTerminator()) {
306 nextLine();
307 state = InString;
308 } else {
309 record16(singleEscape(static_cast<unsigned short>(current)));
310 state = InString;
311 }
312 break;
313 case InHexEscape:
314 if (isHexDigit(current) && isHexDigit(next1)) {
315 state = InString;
316 record16(convertHex(current, next1));
317 shift(1);
318 } else {
319 setDone(Bad);
320 }
321 break;
322 case InUnicodeEscape:
323 if (isHexDigit(current) && isHexDigit(next1) && isHexDigit(next2) && isHexDigit(next3)) {
324 record16(convertUnicode(current, next1, next2, next3));
325 shift(3);
326 state = InString;
327 } else if (current == stringType) {
328 record16('u');
329 shift(1);
330 setDone(String);
331 } else {
332 setDone(Bad);
333 }
334 break;
335 case InSingleLineComment:
336 if (isLineTerminator()) {
337 nextLine();
338 terminator = true;
339 if (restrKeyword) {
340 token = ';';
341 setDone(Other);
342 } else
343 state = Start;
344 } else if (current == -1) {
345 setDone(Eof);
346 }
347 break;
348 case InMultiLineComment:
349 if (current == -1) {
350 setDone(Bad);
351 } else if (isLineTerminator()) {
352 nextLine();
353 } else if (current == '*' && next1 == '/') {
354 state = Start;
355 shift(1);
356 }
357 break;
358 case InIdentifierOrKeyword:
359 case InIdentifier:
360 if (isIdentPart(current))
361 record16(current);
362 else if (current == '\\')
363 state = InIdentifierPartUnicodeEscapeStart;
364 else
365 setDone(state == InIdentifierOrKeyword ? IdentifierOrKeyword : Identifier);
366 break;
367 case InNum0:
368 if (current == 'x' || current == 'X') {
369 record8(current);
370 state = InHex;
371 } else if (current == '.') {
372 record8(current);
373 state = InDecimal;
374 } else if (current == 'e' || current == 'E') {
375 record8(current);
376 state = InExponentIndicator;
377 } else if (isOctalDigit(current)) {
378 record8(current);
379 state = InOctal;
380 } else if (isDecimalDigit(current)) {
381 record8(current);
382 state = InDecimal;
383 } else {
384 setDone(Number);
385 }
386 break;
387 case InHex:
388 if (isHexDigit(current)) {
389 record8(current);
390 } else {
391 setDone(Hex);
392 }
393 break;
394 case InOctal:
395 if (isOctalDigit(current)) {
396 record8(current);
397 }
398 else if (isDecimalDigit(current)) {
399 record8(current);
400 state = InDecimal;
401 } else
402 setDone(Octal);
403 break;
404 case InNum:
405 if (isDecimalDigit(current)) {
406 record8(current);
407 } else if (current == '.') {
408 record8(current);
409 state = InDecimal;
410 } else if (current == 'e' || current == 'E') {
411 record8(current);
412 state = InExponentIndicator;
413 } else
414 setDone(Number);
415 break;
416 case InDecimal:
417 if (isDecimalDigit(current)) {
418 record8(current);
419 } else if (current == 'e' || current == 'E') {
420 record8(current);
421 state = InExponentIndicator;
422 } else
423 setDone(Number);
424 break;
425 case InExponentIndicator:
426 if (current == '+' || current == '-') {
427 record8(current);
428 } else if (isDecimalDigit(current)) {
429 record8(current);
430 state = InExponent;
431 } else
432 setDone(Bad);
433 break;
434 case InExponent:
435 if (isDecimalDigit(current)) {
436 record8(current);
437 } else
438 setDone(Number);
439 break;
440 case InIdentifierStartUnicodeEscapeStart:
441 if (current == 'u')
442 state = InIdentifierStartUnicodeEscape;
443 else
444 setDone(Bad);
445 break;
446 case InIdentifierPartUnicodeEscapeStart:
447 if (current == 'u')
448 state = InIdentifierPartUnicodeEscape;
449 else
450 setDone(Bad);
451 break;
452 case InIdentifierStartUnicodeEscape:
453 if (!isHexDigit(current) || !isHexDigit(next1) || !isHexDigit(next2) || !isHexDigit(next3)) {
454 setDone(Bad);
455 break;
456 }
457 token = convertUnicode(current, next1, next2, next3).uc;
458 shift(3);
459 if (!isIdentStart(token)) {
460 setDone(Bad);
461 break;
462 }
463 record16(token);
464 state = InIdentifier;
465 break;
466 case InIdentifierPartUnicodeEscape:
467 if (!isHexDigit(current) || !isHexDigit(next1) || !isHexDigit(next2) || !isHexDigit(next3)) {
468 setDone(Bad);
469 break;
470 }
471 token = convertUnicode(current, next1, next2, next3).uc;
472 shift(3);
473 if (!isIdentPart(token)) {
474 setDone(Bad);
475 break;
476 }
477 record16(token);
478 state = InIdentifier;
479 break;
480 default:
481 assert(!"Unhandled state in switch statement");
482 }
483
484 // move on to the next character
485 if (!done)
486 shift(1);
487#ifndef KJS_PURE_ECMA
488 if (state != Start && state != InMultiLineComment)
489 bol = false;
490#endif
491 }
492
493 // no identifiers allowed directly after numeric literal, e.g. "3in" is bad
494 if ((state == Number || state == Octal || state == Hex) && isIdentStart(current))
495 state = Bad;
496
497 // terminate string
498 m_buffer8.append('\0');
499
500#ifdef KJS_DEBUG_LEX
501 fprintf(stderr, "line: %d ", lineNo());
502 fprintf(stderr, "yytext (%x): ", m_buffer8[0]);
503 fprintf(stderr, "%s ", m_buffer8.data());
504#endif
505
506 double dval = 0;
507 if (state == Number) {
508 dval = kjs_strtod(m_buffer8.data(), 0L);
509 } else if (state == Hex) { // scan hex numbers
510 const char *p = m_buffer8.data() + 2;
511 while (char c = *p++) {
512 dval *= 16;
513 dval += convertHex(c);
514 }
515
516 if (dval >= mantissaOverflowLowerBound)
517 dval = parseIntOverflow(m_buffer8.data() + 2, p - (m_buffer8.data() + 3), 16);
518
519 state = Number;
520 } else if (state == Octal) { // scan octal number
521 const char *p = m_buffer8.data() + 1;
522 while (char c = *p++) {
523 dval *= 8;
524 dval += c - '0';
525 }
526
527 if (dval >= mantissaOverflowLowerBound)
528 dval = parseIntOverflow(m_buffer8.data() + 1, p - (m_buffer8.data() + 2), 8);
529
530 state = Number;
531 }
532
533#ifdef KJS_DEBUG_LEX
534 switch (state) {
535 case Eof:
536 printf("(EOF)\n");
537 break;
538 case Other:
539 printf("(Other)\n");
540 break;
541 case Identifier:
542 printf("(Identifier)/(Keyword)\n");
543 break;
544 case String:
545 printf("(String)\n");
546 break;
547 case Number:
548 printf("(Number)\n");
549 break;
550 default:
551 printf("(unknown)");
552 }
553#endif
554
555 if (state != Identifier && eatNextIdentifier)
556 eatNextIdentifier = false;
557
558 restrKeyword = false;
559 delimited = false;
560 kjsyylloc.first_line = yylineno; // ???
561 kjsyylloc.last_line = yylineno;
562
563 switch (state) {
564 case Eof:
565 token = 0;
566 break;
567 case Other:
568 if(token == '}' || token == ';') {
569 delimited = true;
570 }
571 break;
572 case IdentifierOrKeyword:
573 if ((token = Lookup::find(&mainTable, m_buffer16.data(), m_buffer16.size())) < 0) {
574 case Identifier:
575 // Lookup for keyword failed, means this is an identifier
576 // Apply anonymous-function hack below (eat the identifier)
577 if (eatNextIdentifier) {
578 eatNextIdentifier = false;
579 token = lex();
580 break;
581 }
582 kjsyylval.ident = makeIdentifier(m_buffer16);
583 token = IDENT;
584 break;
585 }
586
587 eatNextIdentifier = false;
588 // Hack for "f = function somename() { ... }", too hard to get into the grammar
589 if (token == FUNCTION && lastToken == '=' )
590 eatNextIdentifier = true;
591
592 if (token == CONTINUE || token == BREAK ||
593 token == RETURN || token == THROW)
594 restrKeyword = true;
595 break;
596 case String:
597 kjsyylval.ustr = makeUString(m_buffer16);
598 token = STRING;
599 break;
600 case Number:
601 kjsyylval.dval = dval;
602 token = NUMBER;
603 break;
604 case Bad:
605#ifdef KJS_DEBUG_LEX
606 fprintf(stderr, "KJS: yylex: ERROR.\n");
607#endif
608 error = true;
609 return -1;
610 default:
611 assert(!"unhandled numeration value in switch");
612 error = true;
613 return -1;
614 }
615 lastToken = token;
616 return token;
617}
618
619bool Lexer::isWhiteSpace() const
620{
621 return CommonUnicode::isWhiteSpace(current);
622}
623
624bool Lexer::isLineTerminator()
625{
626 bool cr = (current == '\r');
627 bool lf = (current == '\n');
628 if (cr)
629 skipLF = true;
630 else if (lf)
631 skipCR = true;
632 return cr || lf || current == 0x2028 || current == 0x2029;
633}
634
635typedef bool (CharacterCheck)(int c);
636
637static bool isIdentStartLibC(int c)
638{
639 return (category(c) & (Letter_Uppercase | Letter_Lowercase |
640 Letter_Titlecase | Letter_Modifier | Letter_Other))
641 || c == '$' || c == '_';
642}
643
644static bool isIdentPartLibC(int c)
645{
646 return (category(c) & (Letter_Uppercase | Letter_Lowercase |
647 Letter_Titlecase | Letter_Modifier | Letter_Other |
648 Mark_NonSpacing | Mark_SpacingCombining |
649 Number_DecimalDigit | Punctuation_Connector))
650 || c == '$' || c == '_';
651}
652
653static CharacterCheck *identStart = ::isIdentStartLibC;
654static CharacterCheck *identPart = ::isIdentPartLibC;
655
656void Lexer::setIdentStartChecker(bool (*f)(int c))
657{
658 identStart = f;
659}
660
661void Lexer::setIdentPartChecker(bool (*f)(int c))
662{
663 identPart = f;
664}
665
666bool Lexer::isIdentStart(int c)
667{
668 return (*identStart)(c);
669}
670
671bool Lexer::isIdentPart(int c)
672{
673 return (*identPart)(c);
674}
675
676static bool isDecimalDigit(int c)
677{
678 return (c >= '0' && c <= '9');
679}
680
681bool Lexer::isHexDigit(int c)
682{
683 return ((c >= '0' && c <= '9') ||
684 (c >= 'a' && c <= 'f') ||
685 (c >= 'A' && c <= 'F'));
686}
687
688bool Lexer::isOctalDigit(int c)
689{
690 return (c >= '0' && c <= '7');
691}
692
693int Lexer::matchPunctuator(int c1, int c2, int c3, int c4)
694{
695 if (c1 == '>' && c2 == '>' && c3 == '>' && c4 == '=') {
696 shift(4);
697 return URSHIFTEQUAL;
698 } else if (c1 == '=' && c2 == '=' && c3 == '=') {
699 shift(3);
700 return STREQ;
701 } else if (c1 == '!' && c2 == '=' && c3 == '=') {
702 shift(3);
703 return STRNEQ;
704 } else if (c1 == '>' && c2 == '>' && c3 == '>') {
705 shift(3);
706 return URSHIFT;
707 } else if (c1 == '<' && c2 == '<' && c3 == '=') {
708 shift(3);
709 return LSHIFTEQUAL;
710 } else if (c1 == '>' && c2 == '>' && c3 == '=') {
711 shift(3);
712 return RSHIFTEQUAL;
713 } else if (c1 == '<' && c2 == '=') {
714 shift(2);
715 return LE;
716 } else if (c1 == '>' && c2 == '=') {
717 shift(2);
718 return GE;
719 } else if (c1 == '!' && c2 == '=') {
720 shift(2);
721 return NE;
722 } else if (c1 == '+' && c2 == '+') {
723 shift(2);
724 if (terminator)
725 return AUTOPLUSPLUS;
726 else
727 return PLUSPLUS;
728 } else if (c1 == '-' && c2 == '-') {
729 shift(2);
730 if (terminator)
731 return AUTOMINUSMINUS;
732 else
733 return MINUSMINUS;
734 } else if (c1 == '=' && c2 == '=') {
735 shift(2);
736 return EQEQ;
737 } else if (c1 == '+' && c2 == '=') {
738 shift(2);
739 return PLUSEQUAL;
740 } else if (c1 == '-' && c2 == '=') {
741 shift(2);
742 return MINUSEQUAL;
743 } else if (c1 == '*' && c2 == '=') {
744 shift(2);
745 return MULTEQUAL;
746 } else if (c1 == '/' && c2 == '=') {
747 shift(2);
748 return DIVEQUAL;
749 } else if (c1 == '&' && c2 == '=') {
750 shift(2);
751 return ANDEQUAL;
752 } else if (c1 == '^' && c2 == '=') {
753 shift(2);
754 return XOREQUAL;
755 } else if (c1 == '%' && c2 == '=') {
756 shift(2);
757 return MODEQUAL;
758 } else if (c1 == '|' && c2 == '=') {
759 shift(2);
760 return OREQUAL;
761 } else if (c1 == '<' && c2 == '<') {
762 shift(2);
763 return LSHIFT;
764 } else if (c1 == '>' && c2 == '>') {
765 shift(2);
766 return RSHIFT;
767 } else if (c1 == '&' && c2 == '&') {
768 shift(2);
769 return AND;
770 } else if (c1 == '|' && c2 == '|') {
771 shift(2);
772 return OR;
773 }
774
775 switch(c1) {
776 case '=':
777 case '>':
778 case '<':
779 case ',':
780 case '!':
781 case '~':
782 case '?':
783 case ':':
784 case '.':
785 case '+':
786 case '-':
787 case '*':
788 case '/':
789 case '&':
790 case '|':
791 case '^':
792 case '%':
793 case '(':
794 case ')':
795 case '{':
796 case '}':
797 case '[':
798 case ']':
799 case ';':
800 shift(1);
801 return static_cast<int>(c1);
802 default:
803 return -1;
804 }
805}
806
807unsigned short Lexer::singleEscape(unsigned short c)
808{
809 switch(c) {
810 case 'b':
811 return 0x08;
812 case 't':
813 return 0x09;
814 case 'n':
815 return 0x0A;
816 case 'v':
817 return 0x0B;
818 case 'f':
819 return 0x0C;
820 case 'r':
821 return 0x0D;
822 case '"':
823 return 0x22;
824 case '\'':
825 return 0x27;
826 case '\\':
827 return 0x5C;
828 default:
829 return c;
830 }
831}
832
833unsigned short Lexer::convertOctal(int c1, int c2, int c3)
834{
835 return static_cast<unsigned short>((c1 - '0') * 64 + (c2 - '0') * 8 + c3 - '0');
836}
837
838unsigned char Lexer::convertHex(int c)
839{
840 if (c >= '0' && c <= '9')
841 return static_cast<unsigned char>(c - '0');
842 if (c >= 'a' && c <= 'f')
843 return static_cast<unsigned char>(c - 'a' + 10);
844 return static_cast<unsigned char>(c - 'A' + 10);
845}
846
847unsigned char Lexer::convertHex(int c1, int c2)
848{
849 return ((convertHex(c1) << 4) + convertHex(c2));
850}
851
852KJS::UChar Lexer::convertUnicode(int c1, int c2, int c3, int c4)
853{
854 return KJS::UChar((convertHex(c1) << 4) + convertHex(c2),
855 (convertHex(c3) << 4) + convertHex(c4));
856}
857
858void Lexer::record8(int c)
859{
860 ASSERT(c >= 0);
861 ASSERT(c <= 0xff);
862 m_buffer8.append(c);
863}
864
865void Lexer::record16(int c)
866{
867 ASSERT(c >= 0);
868 ASSERT(c <= USHRT_MAX);
869 record16(UChar(static_cast<unsigned short>(c)));
870}
871
872void Lexer::record16(KJS::UChar c)
873{
874 m_buffer16.append(c);
875}
876
877bool Lexer::scanRegExp()
878{
879 m_buffer16.clear();
880 bool lastWasEscape = false;
881 bool inBrackets = false;
882
883 while (1) {
884 if (isLineTerminator() || current == -1)
885 return false;
886 else if (current != '/' || lastWasEscape == true || inBrackets == true)
887 {
888 // keep track of '[' and ']'
889 if (!lastWasEscape) {
890 if ( current == '[' && !inBrackets )
891 inBrackets = true;
892 if ( current == ']' && inBrackets )
893 inBrackets = false;
894 }
895 record16(current);
896 lastWasEscape =
897 !lastWasEscape && (current == '\\');
898 } else { // end of regexp
899 m_pattern = UString(m_buffer16);
900 m_buffer16.clear();
901 shift(1);
902 break;
903 }
904 shift(1);
905 }
906
907 while (isIdentPart(current)) {
908 record16(current);
909 shift(1);
910 }
911 m_flags = UString(m_buffer16);
912
913 return true;
914}
915
916
917void Lexer::clear()
918{
919 deleteAllValues(m_strings);
920 Vector<UString*> newStrings;
921 newStrings.reserveCapacity(initialStringTableCapacity);
922 m_strings.swap(newStrings);
923 deleteAllValues(m_identifiers);
924 Vector<KJS::Identifier*> newIdentifiers;
925 newIdentifiers.reserveCapacity(initialStringTableCapacity);
926 m_identifiers.swap(newIdentifiers);
927
928 Vector<char> newBuffer8;
929 newBuffer8.reserveCapacity(initialReadBufferCapacity);
930 m_buffer8.swap(newBuffer8);
931
932 Vector<UChar> newBuffer16;
933 newBuffer16.reserveCapacity(initialReadBufferCapacity);
934 m_buffer16.swap(newBuffer16);
935
936 m_pattern = 0;
937 m_flags = 0;
938 m_sourceURL = 0;
939}
940
941Identifier* Lexer::makeIdentifier(const Vector<KJS::UChar>& buffer)
942{
943 KJS::Identifier* identifier = new KJS::Identifier(buffer.data(), buffer.size());
944 m_identifiers.append(identifier);
945 return identifier;
946}
947
948UString* Lexer::makeUString(const Vector<KJS::UChar>& buffer)
949{
950 UString* string = new UString(buffer);
951 m_strings.append(string);
952 return string;
953}
954
955} // namespace KJS
956