1/*
2Open Asset Import Library (assimp)
3----------------------------------------------------------------------
4
5Copyright (c) 2006-2017, assimp team
6
7All rights reserved.
8
9Redistribution and use of this software in source and binary forms,
10with or without modification, are permitted provided that the
11following conditions are met:
12
13* Redistributions of source code must retain the above
14 copyright notice, this list of conditions and the
15 following disclaimer.
16
17* Redistributions in binary form must reproduce the above
18 copyright notice, this list of conditions and the
19 following disclaimer in the documentation and/or other
20 materials provided with the distribution.
21
22* Neither the name of the assimp team, nor the names of its
23 contributors may be used to endorse or promote products
24 derived from this software without specific prior
25 written permission of the assimp team.
26
27THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
28"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
29LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
30A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
31OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
32SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
33LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
34DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
35THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
36(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
37OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
38
39----------------------------------------------------------------------
40*/
41/** @file FBXBinaryTokenizer.cpp
42 * @brief Implementation of a fake lexer for binary fbx files -
43 * we emit tokens so the parser needs almost no special handling
44 * for binary files.
45 */
46
47#ifndef ASSIMP_BUILD_NO_FBX_IMPORTER
48
49#include "FBXTokenizer.h"
50#include "FBXUtil.h"
51#include <assimp/defs.h>
52#include <stdint.h>
53#include "Exceptional.h"
54#include "ByteSwapper.h"
55
56namespace Assimp {
57namespace FBX {
58
59//enum Flag
60//{
61// e_unknown_0 = 1 << 0,
62// e_unknown_1 = 1 << 1,
63// e_unknown_2 = 1 << 2,
64// e_unknown_3 = 1 << 3,
65// e_unknown_4 = 1 << 4,
66// e_unknown_5 = 1 << 5,
67// e_unknown_6 = 1 << 6,
68// e_unknown_7 = 1 << 7,
69// e_unknown_8 = 1 << 8,
70// e_unknown_9 = 1 << 9,
71// e_unknown_10 = 1 << 10,
72// e_unknown_11 = 1 << 11,
73// e_unknown_12 = 1 << 12,
74// e_unknown_13 = 1 << 13,
75// e_unknown_14 = 1 << 14,
76// e_unknown_15 = 1 << 15,
77// e_unknown_16 = 1 << 16,
78// e_unknown_17 = 1 << 17,
79// e_unknown_18 = 1 << 18,
80// e_unknown_19 = 1 << 19,
81// e_unknown_20 = 1 << 20,
82// e_unknown_21 = 1 << 21,
83// e_unknown_22 = 1 << 22,
84// e_unknown_23 = 1 << 23,
85// e_flag_field_size_64_bit = 1 << 24, // Not sure what is
86// e_unknown_25 = 1 << 25,
87// e_unknown_26 = 1 << 26,
88// e_unknown_27 = 1 << 27,
89// e_unknown_28 = 1 << 28,
90// e_unknown_29 = 1 << 29,
91// e_unknown_30 = 1 << 30,
92// e_unknown_31 = 1 << 31
93//};
94//
95//bool check_flag(uint32_t flags, Flag to_check)
96//{
97// return (flags & to_check) != 0;
98//}
99// ------------------------------------------------------------------------------------------------
100Token::Token(const char* sbegin, const char* send, TokenType type, unsigned int offset)
101 :
102 #ifdef DEBUG
103 contents(sbegin, static_cast<size_t>(send-sbegin)),
104 #endif
105 sbegin(sbegin)
106 , send(send)
107 , type(type)
108 , line(offset)
109 , column(BINARY_MARKER)
110{
111 ai_assert(sbegin);
112 ai_assert(send);
113
114 // binary tokens may have zero length because they are sometimes dummies
115 // inserted by TokenizeBinary()
116 ai_assert(send >= sbegin);
117}
118
119
120namespace {
121
122// ------------------------------------------------------------------------------------------------
123// signal tokenization error, this is always unrecoverable. Throws DeadlyImportError.
124AI_WONT_RETURN void TokenizeError(const std::string& message, unsigned int offset) AI_WONT_RETURN_SUFFIX;
125AI_WONT_RETURN void TokenizeError(const std::string& message, unsigned int offset)
126{
127 throw DeadlyImportError(Util::AddOffset("FBX-Tokenize",message,offset));
128}
129
130
131// ------------------------------------------------------------------------------------------------
132uint32_t Offset(const char* begin, const char* cursor)
133{
134 ai_assert(begin <= cursor);
135 return static_cast<unsigned int>(cursor - begin);
136}
137
138
139// ------------------------------------------------------------------------------------------------
140void TokenizeError(const std::string& message, const char* begin, const char* cursor)
141{
142 TokenizeError(message, Offset(begin, cursor));
143}
144
145
146// ------------------------------------------------------------------------------------------------
147uint32_t ReadWord(const char* input, const char*& cursor, const char* end)
148{
149 const size_t k_to_read = sizeof( uint32_t );
150 if(Offset(cursor, end) < k_to_read ) {
151 TokenizeError("cannot ReadWord, out of bounds",input, cursor);
152 }
153
154 uint32_t word;
155 memcpy(&word, cursor, 4);
156 AI_SWAP4(word);
157
158 cursor += k_to_read;
159
160 return word;
161}
162
163// ------------------------------------------------------------------------------------------------
164uint64_t ReadDoubleWord(const char* input, const char*& cursor, const char* end) {
165 const size_t k_to_read = sizeof(uint64_t);
166 if(Offset(cursor, end) < k_to_read) {
167 TokenizeError("cannot ReadDoubleWord, out of bounds",input, cursor);
168 }
169
170 uint64_t dword = *reinterpret_cast<const uint64_t*>(cursor);
171 AI_SWAP8(dword);
172
173 cursor += k_to_read;
174
175 return dword;
176}
177
178// ------------------------------------------------------------------------------------------------
179uint8_t ReadByte(const char* input, const char*& cursor, const char* end)
180{
181 if(Offset(cursor, end) < sizeof( uint8_t ) ) {
182 TokenizeError("cannot ReadByte, out of bounds",input, cursor);
183 }
184
185 uint8_t word = *reinterpret_cast<const uint8_t*>(cursor);
186 ++cursor;
187
188 return word;
189}
190
191
192// ------------------------------------------------------------------------------------------------
193unsigned int ReadString(const char*& sbegin_out, const char*& send_out, const char* input, const char*& cursor, const char* end,
194 bool long_length = false,
195 bool allow_null = false)
196{
197 const uint32_t len_len = long_length ? 4 : 1;
198 if(Offset(cursor, end) < len_len) {
199 TokenizeError("cannot ReadString, out of bounds reading length",input, cursor);
200 }
201
202 const uint32_t length = long_length ? ReadWord(input, cursor, end) : ReadByte(input, cursor, end);
203
204 if (Offset(cursor, end) < length) {
205 TokenizeError("cannot ReadString, length is out of bounds",input, cursor);
206 }
207
208 sbegin_out = cursor;
209 cursor += length;
210
211 send_out = cursor;
212
213 if(!allow_null) {
214 for (unsigned int i = 0; i < length; ++i) {
215 if(sbegin_out[i] == '\0') {
216 TokenizeError("failed ReadString, unexpected NUL character in string",input, cursor);
217 }
218 }
219 }
220
221 return length;
222}
223
224// ------------------------------------------------------------------------------------------------
225void ReadData(const char*& sbegin_out, const char*& send_out, const char* input, const char*& cursor, const char* end)
226{
227 if(Offset(cursor, end) < 1) {
228 TokenizeError("cannot ReadData, out of bounds reading length",input, cursor);
229 }
230
231 const char type = *cursor;
232 sbegin_out = cursor++;
233
234 switch(type)
235 {
236 // 16 bit int
237 case 'Y':
238 cursor += 2;
239 break;
240
241 // 1 bit bool flag (yes/no)
242 case 'C':
243 cursor += 1;
244 break;
245
246 // 32 bit int
247 case 'I':
248 // <- fall through
249
250 // float
251 case 'F':
252 cursor += 4;
253 break;
254
255 // double
256 case 'D':
257 cursor += 8;
258 break;
259
260 // 64 bit int
261 case 'L':
262 cursor += 8;
263 break;
264
265 // note: do not write cursor += ReadWord(...cursor) as this would be UB
266
267 // raw binary data
268 case 'R':
269 {
270 const uint32_t length = ReadWord(input, cursor, end);
271 cursor += length;
272 break;
273 }
274
275 case 'b':
276 // TODO: what is the 'b' type code? Right now we just skip over it /
277 // take the full range we could get
278 cursor = end;
279 break;
280
281 // array of *
282 case 'f':
283 case 'd':
284 case 'l':
285 case 'i': {
286
287 const uint32_t length = ReadWord(input, cursor, end);
288 const uint32_t encoding = ReadWord(input, cursor, end);
289
290 const uint32_t comp_len = ReadWord(input, cursor, end);
291
292 // compute length based on type and check against the stored value
293 if(encoding == 0) {
294 uint32_t stride = 0;
295 switch(type)
296 {
297 case 'f':
298 case 'i':
299 stride = 4;
300 break;
301
302 case 'd':
303 case 'l':
304 stride = 8;
305 break;
306
307 default:
308 ai_assert(false);
309 };
310 ai_assert(stride > 0);
311 if(length * stride != comp_len) {
312 TokenizeError("cannot ReadData, calculated data stride differs from what the file claims",input, cursor);
313 }
314 }
315 // zip/deflate algorithm (encoding==1)? take given length. anything else? die
316 else if (encoding != 1) {
317 TokenizeError("cannot ReadData, unknown encoding",input, cursor);
318 }
319 cursor += comp_len;
320 break;
321 }
322
323 // string
324 case 'S': {
325 const char* sb, *se;
326 // 0 characters can legally happen in such strings
327 ReadString(sb, se, input, cursor, end, true, true);
328 break;
329 }
330 default:
331 TokenizeError("cannot ReadData, unexpected type code: " + std::string(&type, 1),input, cursor);
332 }
333
334 if(cursor > end) {
335 TokenizeError("cannot ReadData, the remaining size is too small for the data type: " + std::string(&type, 1),input, cursor);
336 }
337
338 // the type code is contained in the returned range
339 send_out = cursor;
340}
341
342
343// ------------------------------------------------------------------------------------------------
344bool ReadScope(TokenList& output_tokens, const char* input, const char*& cursor, const char* end, bool const is64bits)
345{
346 // the first word contains the offset at which this block ends
347 const uint64_t end_offset = is64bits ? ReadDoubleWord(input, cursor, end) : ReadWord(input, cursor, end);
348
349 // we may get 0 if reading reached the end of the file -
350 // fbx files have a mysterious extra footer which I don't know
351 // how to extract any information from, but at least it always
352 // starts with a 0.
353 if(!end_offset) {
354 return false;
355 }
356
357 if(end_offset > Offset(input, end)) {
358 TokenizeError("block offset is out of range",input, cursor);
359 }
360 else if(end_offset < Offset(input, cursor)) {
361 TokenizeError("block offset is negative out of range",input, cursor);
362 }
363
364 // the second data word contains the number of properties in the scope
365 const uint64_t prop_count = is64bits ? ReadDoubleWord(input, cursor, end) : ReadWord(input, cursor, end);
366
367 // the third data word contains the length of the property list
368 const uint64_t prop_length = is64bits ? ReadDoubleWord(input, cursor, end) : ReadWord(input, cursor, end);
369
370 // now comes the name of the scope/key
371 const char* sbeg, *send;
372 ReadString(sbeg, send, input, cursor, end);
373
374 output_tokens.push_back(new_Token(sbeg, send, TokenType_KEY, Offset(input, cursor) ));
375
376 // now come the individual properties
377 const char* begin_cursor = cursor;
378 for (unsigned int i = 0; i < prop_count; ++i) {
379 ReadData(sbeg, send, input, cursor, begin_cursor + prop_length);
380
381 output_tokens.push_back(new_Token(sbeg, send, TokenType_DATA, Offset(input, cursor) ));
382
383 if(i != prop_count-1) {
384 output_tokens.push_back(new_Token(cursor, cursor + 1, TokenType_COMMA, Offset(input, cursor) ));
385 }
386 }
387
388 if (Offset(begin_cursor, cursor) != prop_length) {
389 TokenizeError("property length not reached, something is wrong",input, cursor);
390 }
391
392 // at the end of each nested block, there is a NUL record to indicate
393 // that the sub-scope exists (i.e. to distinguish between P: and P : {})
394 // this NUL record is 13 bytes long on 32 bit version and 25 bytes long on 64 bit.
395 const size_t sentinel_block_length = is64bits ? (sizeof(uint64_t)* 3 + 1) : (sizeof(uint32_t)* 3 + 1);
396
397 if (Offset(input, cursor) < end_offset) {
398 if (end_offset - Offset(input, cursor) < sentinel_block_length) {
399 TokenizeError("insufficient padding bytes at block end",input, cursor);
400 }
401
402 output_tokens.push_back(new_Token(cursor, cursor + 1, TokenType_OPEN_BRACKET, Offset(input, cursor) ));
403
404 // XXX this is vulnerable to stack overflowing ..
405 while(Offset(input, cursor) < end_offset - sentinel_block_length) {
406 ReadScope(output_tokens, input, cursor, input + end_offset - sentinel_block_length, is64bits);
407 }
408 output_tokens.push_back(new_Token(cursor, cursor + 1, TokenType_CLOSE_BRACKET, Offset(input, cursor) ));
409
410 for (unsigned int i = 0; i < sentinel_block_length; ++i) {
411 if(cursor[i] != '\0') {
412 TokenizeError("failed to read nested block sentinel, expected all bytes to be 0",input, cursor);
413 }
414 }
415 cursor += sentinel_block_length;
416 }
417
418 if (Offset(input, cursor) != end_offset) {
419 TokenizeError("scope length not reached, something is wrong",input, cursor);
420 }
421
422 return true;
423}
424
425}
426
427// ------------------------------------------------------------------------------------------------
428// TODO: Test FBX Binary files newer than the 7500 version to check if the 64 bits address behaviour is consistent
429void TokenizeBinary(TokenList& output_tokens, const char* input, unsigned int length)
430{
431 ai_assert(input);
432
433 if(length < 0x1b) {
434 TokenizeError("file is too short",0);
435 }
436
437 if (strncmp(input,"Kaydara FBX Binary",18)) {
438 TokenizeError("magic bytes not found",0);
439 }
440
441 const char* cursor = input + 18;
442 /*Result ignored*/ ReadByte(input, cursor, input + length);
443 /*Result ignored*/ ReadByte(input, cursor, input + length);
444 /*Result ignored*/ ReadByte(input, cursor, input + length);
445 /*Result ignored*/ ReadByte(input, cursor, input + length);
446 /*Result ignored*/ ReadByte(input, cursor, input + length);
447 const uint32_t version = ReadWord(input, cursor, input + length);
448 const bool is64bits = version >= 7500;
449 while (cursor < input + length)
450 {
451 if (!ReadScope(output_tokens, input, cursor, input + length, is64bits)) {
452 break;
453 }
454 }
455}
456
457} // !FBX
458} // !Assimp
459
460#endif
461