1 | /* |
2 | Open Asset Import Library (assimp) |
3 | ---------------------------------------------------------------------- |
4 | |
5 | Copyright (c) 2006-2017, assimp team |
6 | |
7 | All rights reserved. |
8 | |
9 | Redistribution and use of this software in source and binary forms, |
10 | with or without modification, are permitted provided that the |
11 | following conditions are met: |
12 | |
13 | * Redistributions of source code must retain the above |
14 | copyright notice, this list of conditions and the |
15 | following disclaimer. |
16 | |
17 | * Redistributions in binary form must reproduce the above |
18 | copyright notice, this list of conditions and the |
19 | following disclaimer in the documentation and/or other |
20 | materials provided with the distribution. |
21 | |
22 | * Neither the name of the assimp team, nor the names of its |
23 | contributors may be used to endorse or promote products |
24 | derived from this software without specific prior |
25 | written permission of the assimp team. |
26 | |
27 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
28 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
29 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
30 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
31 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
32 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
33 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
34 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
35 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
36 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
37 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
38 | |
39 | ---------------------------------------------------------------------- |
40 | */ |
41 | |
42 | /** @file FBXTokenizer.cpp |
43 | * @brief Implementation of the FBX broadphase lexer |
44 | */ |
45 | |
46 | #ifndef ASSIMP_BUILD_NO_FBX_IMPORTER |
47 | |
48 | // tab width for logging columns |
49 | #define ASSIMP_FBX_TAB_WIDTH 4 |
50 | |
51 | #include "ParsingUtils.h" |
52 | |
53 | #include "FBXTokenizer.h" |
54 | #include "FBXUtil.h" |
55 | #include "Exceptional.h" |
56 | |
57 | namespace Assimp { |
58 | namespace FBX { |
59 | |
60 | // ------------------------------------------------------------------------------------------------ |
61 | Token::Token(const char* sbegin, const char* send, TokenType type, unsigned int line, unsigned int column) |
62 | : |
63 | #ifdef DEBUG |
64 | contents(sbegin, static_cast<size_t>(send-sbegin)), |
65 | #endif |
66 | sbegin(sbegin) |
67 | , send(send) |
68 | , type(type) |
69 | , line(line) |
70 | , column(column) |
71 | { |
72 | ai_assert(sbegin); |
73 | ai_assert(send); |
74 | |
75 | // tokens must be of non-zero length |
76 | ai_assert(static_cast<size_t>(send-sbegin) > 0); |
77 | } |
78 | |
79 | |
80 | // ------------------------------------------------------------------------------------------------ |
81 | Token::~Token() |
82 | { |
83 | } |
84 | |
85 | |
86 | namespace { |
87 | |
88 | // ------------------------------------------------------------------------------------------------ |
89 | // signal tokenization error, this is always unrecoverable. Throws DeadlyImportError. |
90 | AI_WONT_RETURN void TokenizeError(const std::string& message, unsigned int line, unsigned int column) AI_WONT_RETURN_SUFFIX; |
91 | AI_WONT_RETURN void TokenizeError(const std::string& message, unsigned int line, unsigned int column) |
92 | { |
93 | throw DeadlyImportError(Util::AddLineAndColumn("FBX-Tokenize" ,message,line,column)); |
94 | } |
95 | |
96 | |
97 | // process a potential data token up to 'cur', adding it to 'output_tokens'. |
98 | // ------------------------------------------------------------------------------------------------ |
99 | void ProcessDataToken( TokenList& output_tokens, const char*& start, const char*& end, |
100 | unsigned int line, |
101 | unsigned int column, |
102 | TokenType type = TokenType_DATA, |
103 | bool must_have_token = false) |
104 | { |
105 | if (start && end) { |
106 | // sanity check: |
107 | // tokens should have no whitespace outside quoted text and [start,end] should |
108 | // properly delimit the valid range. |
109 | bool in_double_quotes = false; |
110 | for (const char* c = start; c != end + 1; ++c) { |
111 | if (*c == '\"') { |
112 | in_double_quotes = !in_double_quotes; |
113 | } |
114 | |
115 | if (!in_double_quotes && IsSpaceOrNewLine(*c)) { |
116 | TokenizeError("unexpected whitespace in token" , line, column); |
117 | } |
118 | } |
119 | |
120 | if (in_double_quotes) { |
121 | TokenizeError("non-terminated double quotes" , line, column); |
122 | } |
123 | |
124 | output_tokens.push_back(new_Token(start,end + 1,type,line,column)); |
125 | } |
126 | else if (must_have_token) { |
127 | TokenizeError("unexpected character, expected data token" , line, column); |
128 | } |
129 | |
130 | start = end = NULL; |
131 | } |
132 | |
133 | } |
134 | |
135 | // ------------------------------------------------------------------------------------------------ |
136 | void Tokenize(TokenList& output_tokens, const char* input) |
137 | { |
138 | ai_assert(input); |
139 | |
140 | // line and column numbers numbers are one-based |
141 | unsigned int line = 1; |
142 | unsigned int column = 1; |
143 | |
144 | bool = false; |
145 | bool in_double_quotes = false; |
146 | bool pending_data_token = false; |
147 | |
148 | const char* token_begin = NULL, *token_end = NULL; |
149 | for (const char* cur = input;*cur;column += (*cur == '\t' ? ASSIMP_FBX_TAB_WIDTH : 1), ++cur) { |
150 | const char c = *cur; |
151 | |
152 | if (IsLineEnd(c)) { |
153 | comment = false; |
154 | |
155 | column = 0; |
156 | ++line; |
157 | } |
158 | |
159 | if(comment) { |
160 | continue; |
161 | } |
162 | |
163 | if(in_double_quotes) { |
164 | if (c == '\"') { |
165 | in_double_quotes = false; |
166 | token_end = cur; |
167 | |
168 | ProcessDataToken(output_tokens,token_begin,token_end,line,column); |
169 | pending_data_token = false; |
170 | } |
171 | continue; |
172 | } |
173 | |
174 | switch(c) |
175 | { |
176 | case '\"': |
177 | if (token_begin) { |
178 | TokenizeError("unexpected double-quote" , line, column); |
179 | } |
180 | token_begin = cur; |
181 | in_double_quotes = true; |
182 | continue; |
183 | |
184 | case ';': |
185 | ProcessDataToken(output_tokens,token_begin,token_end,line,column); |
186 | comment = true; |
187 | continue; |
188 | |
189 | case '{': |
190 | ProcessDataToken(output_tokens,token_begin,token_end, line, column); |
191 | output_tokens.push_back(new_Token(cur,cur+1,TokenType_OPEN_BRACKET,line,column)); |
192 | continue; |
193 | |
194 | case '}': |
195 | ProcessDataToken(output_tokens,token_begin,token_end,line,column); |
196 | output_tokens.push_back(new_Token(cur,cur+1,TokenType_CLOSE_BRACKET,line,column)); |
197 | continue; |
198 | |
199 | case ',': |
200 | if (pending_data_token) { |
201 | ProcessDataToken(output_tokens,token_begin,token_end,line,column,TokenType_DATA,true); |
202 | } |
203 | output_tokens.push_back(new_Token(cur,cur+1,TokenType_COMMA,line,column)); |
204 | continue; |
205 | |
206 | case ':': |
207 | if (pending_data_token) { |
208 | ProcessDataToken(output_tokens,token_begin,token_end,line,column,TokenType_KEY,true); |
209 | } |
210 | else { |
211 | TokenizeError("unexpected colon" , line, column); |
212 | } |
213 | continue; |
214 | } |
215 | |
216 | if (IsSpaceOrNewLine(c)) { |
217 | |
218 | if (token_begin) { |
219 | // peek ahead and check if the next token is a colon in which |
220 | // case this counts as KEY token. |
221 | TokenType type = TokenType_DATA; |
222 | for (const char* peek = cur; *peek && IsSpaceOrNewLine(*peek); ++peek) { |
223 | if (*peek == ':') { |
224 | type = TokenType_KEY; |
225 | cur = peek; |
226 | break; |
227 | } |
228 | } |
229 | |
230 | ProcessDataToken(output_tokens,token_begin,token_end,line,column,type); |
231 | } |
232 | |
233 | pending_data_token = false; |
234 | } |
235 | else { |
236 | token_end = cur; |
237 | if (!token_begin) { |
238 | token_begin = cur; |
239 | } |
240 | |
241 | pending_data_token = true; |
242 | } |
243 | } |
244 | } |
245 | |
246 | } // !FBX |
247 | } // !Assimp |
248 | |
249 | #endif |
250 | |