Warning: That file was not part of the compilation database. It may have many parsing errors.
1 | //===--- clang/Basic/CharInfo.h - Classifying ASCII Characters --*- C++ -*-===// |
---|---|
2 | // |
3 | // The LLVM Compiler Infrastructure |
4 | // |
5 | // This file is distributed under the University of Illinois Open Source |
6 | // License. See LICENSE.TXT for details. |
7 | // |
8 | //===----------------------------------------------------------------------===// |
9 | |
10 | #ifndef LLVM_CLANG_BASIC_CHARINFO_H |
11 | #define LLVM_CLANG_BASIC_CHARINFO_H |
12 | |
13 | #include "clang/Basic/LLVM.h" |
14 | #include "llvm/ADT/StringRef.h" |
15 | #include "llvm/Support/Compiler.h" |
16 | #include "llvm/Support/DataTypes.h" |
17 | |
18 | namespace clang { |
19 | namespace charinfo { |
20 | extern const uint16_t InfoTable[256]; |
21 | |
22 | enum { |
23 | CHAR_HORZ_WS = 0x0001, // '\t', '\f', '\v'. Note, no '\0' |
24 | CHAR_VERT_WS = 0x0002, // '\r', '\n' |
25 | CHAR_SPACE = 0x0004, // ' ' |
26 | CHAR_DIGIT = 0x0008, // 0-9 |
27 | CHAR_XLETTER = 0x0010, // a-f,A-F |
28 | CHAR_UPPER = 0x0020, // A-Z |
29 | CHAR_LOWER = 0x0040, // a-z |
30 | CHAR_UNDER = 0x0080, // _ |
31 | CHAR_PERIOD = 0x0100, // . |
32 | CHAR_RAWDEL = 0x0200, // {}[]#<>%:;?*+-/^&|~!=,"' |
33 | CHAR_PUNCT = 0x0400 // `$@() |
34 | }; |
35 | |
36 | enum { |
37 | CHAR_XUPPER = CHAR_XLETTER | CHAR_UPPER, |
38 | CHAR_XLOWER = CHAR_XLETTER | CHAR_LOWER |
39 | }; |
40 | } // end namespace charinfo |
41 | |
42 | /// Returns true if this is an ASCII character. |
43 | LLVM_READNONE inline bool isASCII(char c) { |
44 | return static_cast<unsigned char>(c) <= 127; |
45 | } |
46 | |
47 | /// Returns true if this is a valid first character of a C identifier, |
48 | /// which is [a-zA-Z_]. |
49 | LLVM_READONLY inline bool isIdentifierHead(unsigned char c, |
50 | bool AllowDollar = false) { |
51 | using namespace charinfo; |
52 | if (InfoTable[c] & (CHAR_UPPER|CHAR_LOWER|CHAR_UNDER)) |
53 | return true; |
54 | return AllowDollar && c == '$'; |
55 | } |
56 | |
57 | /// Returns true if this is a body character of a C identifier, |
58 | /// which is [a-zA-Z0-9_]. |
59 | LLVM_READONLY inline bool isIdentifierBody(unsigned char c, |
60 | bool AllowDollar = false) { |
61 | using namespace charinfo; |
62 | if (InfoTable[c] & (CHAR_UPPER|CHAR_LOWER|CHAR_DIGIT|CHAR_UNDER)) |
63 | return true; |
64 | return AllowDollar && c == '$'; |
65 | } |
66 | |
67 | /// Returns true if this character is horizontal ASCII whitespace: |
68 | /// ' ', '\\t', '\\f', '\\v'. |
69 | /// |
70 | /// Note that this returns false for '\\0'. |
71 | LLVM_READONLY inline bool isHorizontalWhitespace(unsigned char c) { |
72 | using namespace charinfo; |
73 | return (InfoTable[c] & (CHAR_HORZ_WS|CHAR_SPACE)) != 0; |
74 | } |
75 | |
76 | /// Returns true if this character is vertical ASCII whitespace: '\\n', '\\r'. |
77 | /// |
78 | /// Note that this returns false for '\\0'. |
79 | LLVM_READONLY inline bool isVerticalWhitespace(unsigned char c) { |
80 | using namespace charinfo; |
81 | return (InfoTable[c] & CHAR_VERT_WS) != 0; |
82 | } |
83 | |
84 | /// Return true if this character is horizontal or vertical ASCII whitespace: |
85 | /// ' ', '\\t', '\\f', '\\v', '\\n', '\\r'. |
86 | /// |
87 | /// Note that this returns false for '\\0'. |
88 | LLVM_READONLY inline bool isWhitespace(unsigned char c) { |
89 | using namespace charinfo; |
90 | return (InfoTable[c] & (CHAR_HORZ_WS|CHAR_VERT_WS|CHAR_SPACE)) != 0; |
91 | } |
92 | |
93 | /// Return true if this character is an ASCII digit: [0-9] |
94 | LLVM_READONLY inline bool isDigit(unsigned char c) { |
95 | using namespace charinfo; |
96 | return (InfoTable[c] & CHAR_DIGIT) != 0; |
97 | } |
98 | |
99 | /// Return true if this character is a lowercase ASCII letter: [a-z] |
100 | LLVM_READONLY inline bool isLowercase(unsigned char c) { |
101 | using namespace charinfo; |
102 | return (InfoTable[c] & CHAR_LOWER) != 0; |
103 | } |
104 | |
105 | /// Return true if this character is an uppercase ASCII letter: [A-Z] |
106 | LLVM_READONLY inline bool isUppercase(unsigned char c) { |
107 | using namespace charinfo; |
108 | return (InfoTable[c] & CHAR_UPPER) != 0; |
109 | } |
110 | |
111 | /// Return true if this character is an ASCII letter: [a-zA-Z] |
112 | LLVM_READONLY inline bool isLetter(unsigned char c) { |
113 | using namespace charinfo; |
114 | return (InfoTable[c] & (CHAR_UPPER|CHAR_LOWER)) != 0; |
115 | } |
116 | |
117 | /// Return true if this character is an ASCII letter or digit: [a-zA-Z0-9] |
118 | LLVM_READONLY inline bool isAlphanumeric(unsigned char c) { |
119 | using namespace charinfo; |
120 | return (InfoTable[c] & (CHAR_DIGIT|CHAR_UPPER|CHAR_LOWER)) != 0; |
121 | } |
122 | |
123 | /// Return true if this character is an ASCII hex digit: [0-9a-fA-F] |
124 | LLVM_READONLY inline bool isHexDigit(unsigned char c) { |
125 | using namespace charinfo; |
126 | return (InfoTable[c] & (CHAR_DIGIT|CHAR_XLETTER)) != 0; |
127 | } |
128 | |
129 | /// Return true if this character is an ASCII punctuation character. |
130 | /// |
131 | /// Note that '_' is both a punctuation character and an identifier character! |
132 | LLVM_READONLY inline bool isPunctuation(unsigned char c) { |
133 | using namespace charinfo; |
134 | return (InfoTable[c] & (CHAR_UNDER|CHAR_PERIOD|CHAR_RAWDEL|CHAR_PUNCT)) != 0; |
135 | } |
136 | |
137 | /// Return true if this character is an ASCII printable character; that is, a |
138 | /// character that should take exactly one column to print in a fixed-width |
139 | /// terminal. |
140 | LLVM_READONLY inline bool isPrintable(unsigned char c) { |
141 | using namespace charinfo; |
142 | return (InfoTable[c] & (CHAR_UPPER|CHAR_LOWER|CHAR_PERIOD|CHAR_PUNCT| |
143 | CHAR_DIGIT|CHAR_UNDER|CHAR_RAWDEL|CHAR_SPACE)) != 0; |
144 | } |
145 | |
146 | /// Return true if this is the body character of a C preprocessing number, |
147 | /// which is [a-zA-Z0-9_.]. |
148 | LLVM_READONLY inline bool isPreprocessingNumberBody(unsigned char c) { |
149 | using namespace charinfo; |
150 | return (InfoTable[c] & |
151 | (CHAR_UPPER|CHAR_LOWER|CHAR_DIGIT|CHAR_UNDER|CHAR_PERIOD)) != 0; |
152 | } |
153 | |
154 | /// Return true if this is the body character of a C++ raw string delimiter. |
155 | LLVM_READONLY inline bool isRawStringDelimBody(unsigned char c) { |
156 | using namespace charinfo; |
157 | return (InfoTable[c] & (CHAR_UPPER|CHAR_LOWER|CHAR_PERIOD| |
158 | CHAR_DIGIT|CHAR_UNDER|CHAR_RAWDEL)) != 0; |
159 | } |
160 | |
161 | |
162 | /// Converts the given ASCII character to its lowercase equivalent. |
163 | /// |
164 | /// If the character is not an uppercase character, it is returned as is. |
165 | LLVM_READONLY inline char toLowercase(char c) { |
166 | if (isUppercase(c)) |
167 | return c + 'a' - 'A'; |
168 | return c; |
169 | } |
170 | |
171 | /// Converts the given ASCII character to its uppercase equivalent. |
172 | /// |
173 | /// If the character is not a lowercase character, it is returned as is. |
174 | LLVM_READONLY inline char toUppercase(char c) { |
175 | if (isLowercase(c)) |
176 | return c + 'A' - 'a'; |
177 | return c; |
178 | } |
179 | |
180 | |
181 | /// Return true if this is a valid ASCII identifier. |
182 | /// |
183 | /// Note that this is a very simple check; it does not accept UCNs as valid |
184 | /// identifier characters. |
185 | LLVM_READONLY inline bool isValidIdentifier(StringRef S, |
186 | bool AllowDollar = false) { |
187 | if (S.empty() || !isIdentifierHead(S[0], AllowDollar)) |
188 | return false; |
189 | |
190 | for (StringRef::iterator I = S.begin(), E = S.end(); I != E; ++I) |
191 | if (!isIdentifierBody(*I, AllowDollar)) |
192 | return false; |
193 | |
194 | return true; |
195 | } |
196 | |
197 | } // end namespace clang |
198 | |
199 | #endif |
200 |
Warning: That file was not part of the compilation database. It may have many parsing errors.