1// -*- c-basic-offset: 2 -*-
2/*
3 * This file is part of the KDE libraries
4 * Copyright (C) 1999-2000 Harri Porten (porten@kde.org)
5 * Copyright (C) 2007 Apple Inc.
6 *
7 * This library is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Library General Public
9 * License as published by the Free Software Foundation; either
10 * version 2 of the License, or (at your option) any later version.
11 *
12 * This library is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Library General Public License for more details.
16 *
17 * You should have received a copy of the GNU Library General Public License
18 * along with this library; see the file COPYING.LIB. If not, write to
19 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
20 * Boston, MA 02110-1301, USA.
21 *
22 */
23
24#ifndef Lexer_h
25#define Lexer_h
26
27#include "ustring.h"
28#include <wtf/Vector.h>
29#include <wtf/Noncopyable.h>
30
31namespace KJS {
32
33 class Identifier;
34 class RegExp;
35
36 class Lexer : Noncopyable {
37 public:
38 void setCode(const UString &sourceURL, int startingLineNumber, const UChar *c, unsigned int len);
39 int lex();
40
41 int lineNo() const { return yylineno; }
42 UString sourceURL() const { return m_sourceURL; }
43
44 bool prevTerminator() const { return terminator; }
45
46 enum State { Start,
47 IdentifierOrKeyword,
48 Identifier,
49 InIdentifierOrKeyword,
50 InIdentifier,
51 InIdentifierStartUnicodeEscapeStart,
52 InIdentifierStartUnicodeEscape,
53 InIdentifierPartUnicodeEscapeStart,
54 InIdentifierPartUnicodeEscape,
55 InSingleLineComment,
56 InMultiLineComment,
57 InNum,
58 InNum0,
59 InHex,
60 InOctal,
61 InDecimal,
62 InExponentIndicator,
63 InExponent,
64 Hex,
65 Octal,
66 Number,
67 String,
68 Eof,
69 InString,
70 InEscapeSequence,
71 InHexEscape,
72 InUnicodeEscape,
73 Other,
74 Bad };
75
76 bool scanRegExp();
77 const UString& pattern() const { return m_pattern; }
78 const UString& flags() const { return m_flags; }
79
80 static unsigned char convertHex(int);
81 static unsigned char convertHex(int c1, int c2);
82 static UChar convertUnicode(int c1, int c2, int c3, int c4);
83 static bool isIdentStart(int);
84 static bool isIdentPart(int);
85 static bool isHexDigit(int);
86
87 bool sawError() const { return error; }
88
89 void clear();
90
91 static void setIdentStartChecker(bool (*f)(int c));
92 static void setIdentPartChecker(bool (*f)(int c));
93
94 private:
95 friend Lexer& lexer();
96 Lexer();
97
98 int yylineno;
99 UString m_sourceURL;
100 bool done;
101 Vector<char> m_buffer8;
102 Vector<UChar> m_buffer16;
103 bool terminator;
104 bool restrKeyword;
105 // encountered delimiter like "'" and "}" on last run
106 bool delimited;
107 bool skipLF;
108 bool skipCR;
109 bool eatNextIdentifier;
110 int stackToken;
111 int lastToken;
112
113 State state;
114 void setDone(State s);
115 unsigned int pos;
116 void shift(unsigned int p);
117 void nextLine();
118 int lookupKeyword(const char *);
119
120 bool isWhiteSpace() const;
121 bool isLineTerminator();
122 static bool isOctalDigit(int c);
123
124 int matchPunctuator(int c1, int c2, int c3, int c4);
125 static unsigned short singleEscape(unsigned short c);
126 static unsigned short convertOctal(int c1, int c2, int c3);
127
128 void record8(int c);
129 void record16(int c);
130 void record16(UChar c);
131
132 KJS::Identifier* makeIdentifier(const Vector<UChar>& buffer);
133 UString* makeUString(const Vector<UChar>& buffer);
134
135 const UChar *code;
136 unsigned int length;
137 int yycolumn;
138#ifndef KJS_PURE_ECMA
139 int bol; // begin of line
140#endif
141 bool error;
142
143 // current and following unicode characters (int to allow for -1 for end-of-file marker)
144 int current, next1, next2, next3;
145
146 Vector<UString*> m_strings;
147 Vector<KJS::Identifier*> m_identifiers;
148
149 UString m_pattern;
150 UString m_flags;
151 };
152
153 Lexer& lexer(); // Returns the singletone JavaScript lexer.
154
155} // namespace KJS
156
157#endif // Lexer_h
158