1 | // -*- c-basic-offset: 2 -*- |
2 | /* |
3 | * This file is part of the KDE libraries |
4 | * Copyright (C) 1999-2000 Harri Porten (porten@kde.org) |
5 | * Copyright (C) 2007 Apple Inc. |
6 | * |
7 | * This library is free software; you can redistribute it and/or |
8 | * modify it under the terms of the GNU Library General Public |
9 | * License as published by the Free Software Foundation; either |
10 | * version 2 of the License, or (at your option) any later version. |
11 | * |
12 | * This library is distributed in the hope that it will be useful, |
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
15 | * Library General Public License for more details. |
16 | * |
17 | * You should have received a copy of the GNU Library General Public License |
18 | * along with this library; see the file COPYING.LIB. If not, write to |
19 | * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, |
20 | * Boston, MA 02110-1301, USA. |
21 | * |
22 | */ |
23 | |
24 | #ifndef Lexer_h |
25 | #define Lexer_h |
26 | |
27 | #include "ustring.h" |
28 | #include <wtf/Vector.h> |
29 | #include <wtf/Noncopyable.h> |
30 | |
31 | namespace KJS { |
32 | |
33 | class Identifier; |
34 | class RegExp; |
35 | |
36 | class Lexer : Noncopyable { |
37 | public: |
38 | void setCode(const UString &sourceURL, int startingLineNumber, const UChar *c, unsigned int len); |
39 | int lex(); |
40 | |
41 | int lineNo() const { return yylineno; } |
42 | UString sourceURL() const { return m_sourceURL; } |
43 | |
44 | bool prevTerminator() const { return terminator; } |
45 | |
46 | enum State { Start, |
47 | IdentifierOrKeyword, |
48 | Identifier, |
49 | InIdentifierOrKeyword, |
50 | InIdentifier, |
51 | InIdentifierStartUnicodeEscapeStart, |
52 | InIdentifierStartUnicodeEscape, |
53 | InIdentifierPartUnicodeEscapeStart, |
54 | InIdentifierPartUnicodeEscape, |
55 | , |
56 | , |
57 | InNum, |
58 | InNum0, |
59 | InHex, |
60 | InOctal, |
61 | InDecimal, |
62 | InExponentIndicator, |
63 | InExponent, |
64 | Hex, |
65 | Octal, |
66 | Number, |
67 | String, |
68 | Eof, |
69 | InString, |
70 | InEscapeSequence, |
71 | InHexEscape, |
72 | InUnicodeEscape, |
73 | Other, |
74 | Bad }; |
75 | |
76 | bool scanRegExp(); |
77 | const UString& pattern() const { return m_pattern; } |
78 | const UString& flags() const { return m_flags; } |
79 | |
80 | static unsigned char convertHex(int); |
81 | static unsigned char convertHex(int c1, int c2); |
82 | static UChar convertUnicode(int c1, int c2, int c3, int c4); |
83 | static bool isIdentStart(int); |
84 | static bool isIdentPart(int); |
85 | static bool isHexDigit(int); |
86 | |
87 | bool sawError() const { return error; } |
88 | |
89 | void clear(); |
90 | |
91 | static void setIdentStartChecker(bool (*f)(int c)); |
92 | static void setIdentPartChecker(bool (*f)(int c)); |
93 | |
94 | private: |
95 | friend Lexer& lexer(); |
96 | Lexer(); |
97 | |
98 | int yylineno; |
99 | UString m_sourceURL; |
100 | bool done; |
101 | Vector<char> m_buffer8; |
102 | Vector<UChar> m_buffer16; |
103 | bool terminator; |
104 | bool restrKeyword; |
105 | // encountered delimiter like "'" and "}" on last run |
106 | bool delimited; |
107 | bool skipLF; |
108 | bool skipCR; |
109 | bool eatNextIdentifier; |
110 | int stackToken; |
111 | int lastToken; |
112 | |
113 | State state; |
114 | void setDone(State s); |
115 | unsigned int pos; |
116 | void shift(unsigned int p); |
117 | void nextLine(); |
118 | int lookupKeyword(const char *); |
119 | |
120 | bool isWhiteSpace() const; |
121 | bool isLineTerminator(); |
122 | static bool isOctalDigit(int c); |
123 | |
124 | int matchPunctuator(int c1, int c2, int c3, int c4); |
125 | static unsigned short singleEscape(unsigned short c); |
126 | static unsigned short convertOctal(int c1, int c2, int c3); |
127 | |
128 | void record8(int c); |
129 | void record16(int c); |
130 | void record16(UChar c); |
131 | |
132 | KJS::Identifier* makeIdentifier(const Vector<UChar>& buffer); |
133 | UString* makeUString(const Vector<UChar>& buffer); |
134 | |
135 | const UChar *code; |
136 | unsigned int length; |
137 | int yycolumn; |
138 | #ifndef KJS_PURE_ECMA |
139 | int bol; // begin of line |
140 | #endif |
141 | bool error; |
142 | |
143 | // current and following unicode characters (int to allow for -1 for end-of-file marker) |
144 | int current, next1, next2, next3; |
145 | |
146 | Vector<UString*> m_strings; |
147 | Vector<KJS::Identifier*> m_identifiers; |
148 | |
149 | UString m_pattern; |
150 | UString m_flags; |
151 | }; |
152 | |
153 | Lexer& lexer(); // Returns the singletone JavaScript lexer. |
154 | |
155 | } // namespace KJS |
156 | |
157 | #endif // Lexer_h |
158 | |