1 | /* |
2 | * .xz Stream decoder |
3 | * |
4 | * Author: Lasse Collin <lasse.collin@tukaani.org> |
5 | * |
6 | * This file has been put into the public domain. |
7 | * You can do whatever you want with this file. |
8 | */ |
9 | |
10 | #include "xz_private.h" |
11 | #include "xz_stream.h" |
12 | |
13 | /* Hash used to validate the Index field */ |
14 | struct xz_dec_hash { |
15 | vli_type unpadded; |
16 | vli_type uncompressed; |
17 | uint32_t crc32; |
18 | }; |
19 | |
20 | struct xz_dec { |
21 | /* Position in dec_main() */ |
22 | enum { |
23 | , |
24 | SEQ_BLOCK_START, |
25 | , |
26 | SEQ_BLOCK_UNCOMPRESS, |
27 | SEQ_BLOCK_PADDING, |
28 | SEQ_BLOCK_CHECK, |
29 | SEQ_INDEX, |
30 | SEQ_INDEX_PADDING, |
31 | SEQ_INDEX_CRC32, |
32 | |
33 | } sequence; |
34 | |
35 | /* Position in variable-length integers and Check fields */ |
36 | uint32_t pos; |
37 | |
38 | /* Variable-length integer decoded by dec_vli() */ |
39 | vli_type vli; |
40 | |
41 | /* Saved in_pos and out_pos */ |
42 | size_t in_start; |
43 | size_t out_start; |
44 | |
45 | /* CRC32 value in Block or Index */ |
46 | uint32_t crc32; |
47 | |
48 | /* Type of the integrity check calculated from uncompressed data */ |
49 | enum xz_check check_type; |
50 | |
51 | /* Operation mode */ |
52 | enum xz_mode mode; |
53 | |
54 | /* |
55 | * True if the next call to xz_dec_run() is allowed to return |
56 | * XZ_BUF_ERROR. |
57 | */ |
58 | bool allow_buf_error; |
59 | |
60 | /* Information stored in Block Header */ |
61 | struct { |
62 | /* |
63 | * Value stored in the Compressed Size field, or |
64 | * VLI_UNKNOWN if Compressed Size is not present. |
65 | */ |
66 | vli_type compressed; |
67 | |
68 | /* |
69 | * Value stored in the Uncompressed Size field, or |
70 | * VLI_UNKNOWN if Uncompressed Size is not present. |
71 | */ |
72 | vli_type uncompressed; |
73 | |
74 | /* Size of the Block Header field */ |
75 | uint32_t size; |
76 | } ; |
77 | |
78 | /* Information collected when decoding Blocks */ |
79 | struct { |
80 | /* Observed compressed size of the current Block */ |
81 | vli_type compressed; |
82 | |
83 | /* Observed uncompressed size of the current Block */ |
84 | vli_type uncompressed; |
85 | |
86 | /* Number of Blocks decoded so far */ |
87 | vli_type count; |
88 | |
89 | /* |
90 | * Hash calculated from the Block sizes. This is used to |
91 | * validate the Index field. |
92 | */ |
93 | struct xz_dec_hash hash; |
94 | } block; |
95 | |
96 | /* Variables needed when verifying the Index field */ |
97 | struct { |
98 | /* Position in dec_index() */ |
99 | enum { |
100 | SEQ_INDEX_COUNT, |
101 | SEQ_INDEX_UNPADDED, |
102 | SEQ_INDEX_UNCOMPRESSED |
103 | } sequence; |
104 | |
105 | /* Size of the Index in bytes */ |
106 | vli_type size; |
107 | |
108 | /* Number of Records (matches block.count in valid files) */ |
109 | vli_type count; |
110 | |
111 | /* |
112 | * Hash calculated from the Records (matches block.hash in |
113 | * valid files). |
114 | */ |
115 | struct xz_dec_hash hash; |
116 | } index; |
117 | |
118 | /* |
119 | * Temporary buffer needed to hold Stream Header, Block Header, |
120 | * and Stream Footer. The Block Header is the biggest (1 KiB) |
121 | * so we reserve space according to that. buf[] has to be aligned |
122 | * to a multiple of four bytes; the size_t variables before it |
123 | * should guarantee this. |
124 | */ |
125 | struct { |
126 | size_t pos; |
127 | size_t size; |
128 | uint8_t buf[1024]; |
129 | } temp; |
130 | |
131 | struct xz_dec_lzma2 *lzma2; |
132 | |
133 | #ifdef XZ_DEC_BCJ |
134 | struct xz_dec_bcj *bcj; |
135 | bool bcj_active; |
136 | #endif |
137 | }; |
138 | |
139 | #ifdef XZ_DEC_ANY_CHECK |
140 | /* Sizes of the Check field with different Check IDs */ |
141 | static const uint8_t check_sizes[16] = { |
142 | 0, |
143 | 4, 4, 4, |
144 | 8, 8, 8, |
145 | 16, 16, 16, |
146 | 32, 32, 32, |
147 | 64, 64, 64 |
148 | }; |
149 | #endif |
150 | |
151 | /* |
152 | * Fill s->temp by copying data starting from b->in[b->in_pos]. Caller |
153 | * must have set s->temp.pos to indicate how much data we are supposed |
154 | * to copy into s->temp.buf. Return true once s->temp.pos has reached |
155 | * s->temp.size. |
156 | */ |
157 | static bool fill_temp(struct xz_dec *s, struct xz_buf *b) |
158 | { |
159 | size_t copy_size = min_t(size_t, |
160 | b->in_size - b->in_pos, s->temp.size - s->temp.pos); |
161 | |
162 | memcpy(s->temp.buf + s->temp.pos, b->in + b->in_pos, copy_size); |
163 | b->in_pos += copy_size; |
164 | s->temp.pos += copy_size; |
165 | |
166 | if (s->temp.pos == s->temp.size) { |
167 | s->temp.pos = 0; |
168 | return true; |
169 | } |
170 | |
171 | return false; |
172 | } |
173 | |
174 | /* Decode a variable-length integer (little-endian base-128 encoding) */ |
175 | static enum xz_ret dec_vli(struct xz_dec *s, const uint8_t *in, |
176 | size_t *in_pos, size_t in_size) |
177 | { |
178 | uint8_t byte; |
179 | |
180 | if (s->pos == 0) |
181 | s->vli = 0; |
182 | |
183 | while (*in_pos < in_size) { |
184 | byte = in[*in_pos]; |
185 | ++*in_pos; |
186 | |
187 | s->vli |= (vli_type)(byte & 0x7F) << s->pos; |
188 | |
189 | if ((byte & 0x80) == 0) { |
190 | /* Don't allow non-minimal encodings. */ |
191 | if (byte == 0 && s->pos != 0) |
192 | return XZ_DATA_ERROR; |
193 | |
194 | s->pos = 0; |
195 | return XZ_STREAM_END; |
196 | } |
197 | |
198 | s->pos += 7; |
199 | if (s->pos == 7 * VLI_BYTES_MAX) |
200 | return XZ_DATA_ERROR; |
201 | } |
202 | |
203 | return XZ_OK; |
204 | } |
205 | |
206 | /* |
207 | * Decode the Compressed Data field from a Block. Update and validate |
208 | * the observed compressed and uncompressed sizes of the Block so that |
209 | * they don't exceed the values possibly stored in the Block Header |
210 | * (validation assumes that no integer overflow occurs, since vli_type |
211 | * is normally uint64_t). Update the CRC32 if presence of the CRC32 |
212 | * field was indicated in Stream Header. |
213 | * |
214 | * Once the decoding is finished, validate that the observed sizes match |
215 | * the sizes possibly stored in the Block Header. Update the hash and |
216 | * Block count, which are later used to validate the Index field. |
217 | */ |
218 | static enum xz_ret dec_block(struct xz_dec *s, struct xz_buf *b) |
219 | { |
220 | enum xz_ret ret; |
221 | |
222 | s->in_start = b->in_pos; |
223 | s->out_start = b->out_pos; |
224 | |
225 | #ifdef XZ_DEC_BCJ |
226 | if (s->bcj_active) |
227 | ret = xz_dec_bcj_run(s: s->bcj, lzma2: s->lzma2, b); |
228 | else |
229 | #endif |
230 | ret = xz_dec_lzma2_run(s: s->lzma2, b); |
231 | |
232 | s->block.compressed += b->in_pos - s->in_start; |
233 | s->block.uncompressed += b->out_pos - s->out_start; |
234 | |
235 | /* |
236 | * There is no need to separately check for VLI_UNKNOWN, since |
237 | * the observed sizes are always smaller than VLI_UNKNOWN. |
238 | */ |
239 | if (s->block.compressed > s->block_header.compressed |
240 | || s->block.uncompressed |
241 | > s->block_header.uncompressed) |
242 | return XZ_DATA_ERROR; |
243 | |
244 | if (s->check_type == XZ_CHECK_CRC32) |
245 | s->crc32 = xz_crc32(b->out + s->out_start, |
246 | b->out_pos - s->out_start, s->crc32); |
247 | |
248 | if (ret == XZ_STREAM_END) { |
249 | if (s->block_header.compressed != VLI_UNKNOWN |
250 | && s->block_header.compressed |
251 | != s->block.compressed) |
252 | return XZ_DATA_ERROR; |
253 | |
254 | if (s->block_header.uncompressed != VLI_UNKNOWN |
255 | && s->block_header.uncompressed |
256 | != s->block.uncompressed) |
257 | return XZ_DATA_ERROR; |
258 | |
259 | s->block.hash.unpadded += s->block_header.size |
260 | + s->block.compressed; |
261 | |
262 | #ifdef XZ_DEC_ANY_CHECK |
263 | s->block.hash.unpadded += check_sizes[s->check_type]; |
264 | #else |
265 | if (s->check_type == XZ_CHECK_CRC32) |
266 | s->block.hash.unpadded += 4; |
267 | #endif |
268 | |
269 | s->block.hash.uncompressed += s->block.uncompressed; |
270 | s->block.hash.crc32 = xz_crc32( |
271 | (const uint8_t *)&s->block.hash, |
272 | sizeof(s->block.hash), s->block.hash.crc32); |
273 | |
274 | ++s->block.count; |
275 | } |
276 | |
277 | return ret; |
278 | } |
279 | |
280 | /* Update the Index size and the CRC32 value. */ |
281 | static void index_update(struct xz_dec *s, const struct xz_buf *b) |
282 | { |
283 | size_t in_used = b->in_pos - s->in_start; |
284 | s->index.size += in_used; |
285 | s->crc32 = xz_crc32(b->in + s->in_start, in_used, s->crc32); |
286 | } |
287 | |
288 | /* |
289 | * Decode the Number of Records, Unpadded Size, and Uncompressed Size |
290 | * fields from the Index field. That is, Index Padding and CRC32 are not |
291 | * decoded by this function. |
292 | * |
293 | * This can return XZ_OK (more input needed), XZ_STREAM_END (everything |
294 | * successfully decoded), or XZ_DATA_ERROR (input is corrupt). |
295 | */ |
296 | static enum xz_ret dec_index(struct xz_dec *s, struct xz_buf *b) |
297 | { |
298 | enum xz_ret ret; |
299 | |
300 | do { |
301 | ret = dec_vli(s, in: b->in, in_pos: &b->in_pos, in_size: b->in_size); |
302 | if (ret != XZ_STREAM_END) { |
303 | index_update(s, b); |
304 | return ret; |
305 | } |
306 | |
307 | switch (s->index.sequence) { |
308 | case SEQ_INDEX_COUNT: |
309 | s->index.count = s->vli; |
310 | |
311 | /* |
312 | * Validate that the Number of Records field |
313 | * indicates the same number of Records as |
314 | * there were Blocks in the Stream. |
315 | */ |
316 | if (s->index.count != s->block.count) |
317 | return XZ_DATA_ERROR; |
318 | |
319 | s->index.sequence = SEQ_INDEX_UNPADDED; |
320 | break; |
321 | |
322 | case SEQ_INDEX_UNPADDED: |
323 | s->index.hash.unpadded += s->vli; |
324 | s->index.sequence = SEQ_INDEX_UNCOMPRESSED; |
325 | break; |
326 | |
327 | case SEQ_INDEX_UNCOMPRESSED: |
328 | s->index.hash.uncompressed += s->vli; |
329 | s->index.hash.crc32 = xz_crc32( |
330 | (const uint8_t *)&s->index.hash, |
331 | sizeof(s->index.hash), |
332 | s->index.hash.crc32); |
333 | --s->index.count; |
334 | s->index.sequence = SEQ_INDEX_UNPADDED; |
335 | break; |
336 | } |
337 | } while (s->index.count > 0); |
338 | |
339 | return XZ_STREAM_END; |
340 | } |
341 | |
342 | /* |
343 | * Validate that the next four input bytes match the value of s->crc32. |
344 | * s->pos must be zero when starting to validate the first byte. |
345 | */ |
346 | static enum xz_ret crc32_validate(struct xz_dec *s, struct xz_buf *b) |
347 | { |
348 | do { |
349 | if (b->in_pos == b->in_size) |
350 | return XZ_OK; |
351 | |
352 | if (((s->crc32 >> s->pos) & 0xFF) != b->in[b->in_pos++]) |
353 | return XZ_DATA_ERROR; |
354 | |
355 | s->pos += 8; |
356 | |
357 | } while (s->pos < 32); |
358 | |
359 | s->crc32 = 0; |
360 | s->pos = 0; |
361 | |
362 | return XZ_STREAM_END; |
363 | } |
364 | |
365 | #ifdef XZ_DEC_ANY_CHECK |
366 | /* |
367 | * Skip over the Check field when the Check ID is not supported. |
368 | * Returns true once the whole Check field has been skipped over. |
369 | */ |
370 | static bool check_skip(struct xz_dec *s, struct xz_buf *b) |
371 | { |
372 | while (s->pos < check_sizes[s->check_type]) { |
373 | if (b->in_pos == b->in_size) |
374 | return false; |
375 | |
376 | ++b->in_pos; |
377 | ++s->pos; |
378 | } |
379 | |
380 | s->pos = 0; |
381 | |
382 | return true; |
383 | } |
384 | #endif |
385 | |
386 | /* Decode the Stream Header field (the first 12 bytes of the .xz Stream). */ |
387 | static enum xz_ret (struct xz_dec *s) |
388 | { |
389 | if (!memeq(s->temp.buf, HEADER_MAGIC, HEADER_MAGIC_SIZE)) |
390 | return XZ_FORMAT_ERROR; |
391 | |
392 | if (xz_crc32(s->temp.buf + HEADER_MAGIC_SIZE, 2, 0) |
393 | != get_le32(s->temp.buf + HEADER_MAGIC_SIZE + 2)) |
394 | return XZ_DATA_ERROR; |
395 | |
396 | if (s->temp.buf[HEADER_MAGIC_SIZE] != 0) |
397 | return XZ_OPTIONS_ERROR; |
398 | |
399 | /* |
400 | * Of integrity checks, we support only none (Check ID = 0) and |
401 | * CRC32 (Check ID = 1). However, if XZ_DEC_ANY_CHECK is defined, |
402 | * we will accept other check types too, but then the check won't |
403 | * be verified and a warning (XZ_UNSUPPORTED_CHECK) will be given. |
404 | */ |
405 | if (s->temp.buf[HEADER_MAGIC_SIZE + 1] > XZ_CHECK_MAX) |
406 | return XZ_OPTIONS_ERROR; |
407 | |
408 | s->check_type = s->temp.buf[HEADER_MAGIC_SIZE + 1]; |
409 | |
410 | #ifdef XZ_DEC_ANY_CHECK |
411 | if (s->check_type > XZ_CHECK_CRC32) |
412 | return XZ_UNSUPPORTED_CHECK; |
413 | #else |
414 | if (s->check_type > XZ_CHECK_CRC32) |
415 | return XZ_OPTIONS_ERROR; |
416 | #endif |
417 | |
418 | return XZ_OK; |
419 | } |
420 | |
421 | /* Decode the Stream Footer field (the last 12 bytes of the .xz Stream) */ |
422 | static enum xz_ret (struct xz_dec *s) |
423 | { |
424 | if (!memeq(s->temp.buf + 10, FOOTER_MAGIC, FOOTER_MAGIC_SIZE)) |
425 | return XZ_DATA_ERROR; |
426 | |
427 | if (xz_crc32(s->temp.buf + 4, 6, 0) != get_le32(s->temp.buf)) |
428 | return XZ_DATA_ERROR; |
429 | |
430 | /* |
431 | * Validate Backward Size. Note that we never added the size of the |
432 | * Index CRC32 field to s->index.size, thus we use s->index.size / 4 |
433 | * instead of s->index.size / 4 - 1. |
434 | */ |
435 | if ((s->index.size >> 2) != get_le32(s->temp.buf + 4)) |
436 | return XZ_DATA_ERROR; |
437 | |
438 | if (s->temp.buf[8] != 0 || s->temp.buf[9] != s->check_type) |
439 | return XZ_DATA_ERROR; |
440 | |
441 | /* |
442 | * Use XZ_STREAM_END instead of XZ_OK to be more convenient |
443 | * for the caller. |
444 | */ |
445 | return XZ_STREAM_END; |
446 | } |
447 | |
448 | /* Decode the Block Header and initialize the filter chain. */ |
449 | static enum xz_ret (struct xz_dec *s) |
450 | { |
451 | enum xz_ret ret; |
452 | |
453 | /* |
454 | * Validate the CRC32. We know that the temp buffer is at least |
455 | * eight bytes so this is safe. |
456 | */ |
457 | s->temp.size -= 4; |
458 | if (xz_crc32(s->temp.buf, s->temp.size, 0) |
459 | != get_le32(s->temp.buf + s->temp.size)) |
460 | return XZ_DATA_ERROR; |
461 | |
462 | s->temp.pos = 2; |
463 | |
464 | /* |
465 | * Catch unsupported Block Flags. We support only one or two filters |
466 | * in the chain, so we catch that with the same test. |
467 | */ |
468 | #ifdef XZ_DEC_BCJ |
469 | if (s->temp.buf[1] & 0x3E) |
470 | #else |
471 | if (s->temp.buf[1] & 0x3F) |
472 | #endif |
473 | return XZ_OPTIONS_ERROR; |
474 | |
475 | /* Compressed Size */ |
476 | if (s->temp.buf[1] & 0x40) { |
477 | if (dec_vli(s, in: s->temp.buf, in_pos: &s->temp.pos, in_size: s->temp.size) |
478 | != XZ_STREAM_END) |
479 | return XZ_DATA_ERROR; |
480 | |
481 | s->block_header.compressed = s->vli; |
482 | } else { |
483 | s->block_header.compressed = VLI_UNKNOWN; |
484 | } |
485 | |
486 | /* Uncompressed Size */ |
487 | if (s->temp.buf[1] & 0x80) { |
488 | if (dec_vli(s, in: s->temp.buf, in_pos: &s->temp.pos, in_size: s->temp.size) |
489 | != XZ_STREAM_END) |
490 | return XZ_DATA_ERROR; |
491 | |
492 | s->block_header.uncompressed = s->vli; |
493 | } else { |
494 | s->block_header.uncompressed = VLI_UNKNOWN; |
495 | } |
496 | |
497 | #ifdef XZ_DEC_BCJ |
498 | /* If there are two filters, the first one must be a BCJ filter. */ |
499 | s->bcj_active = s->temp.buf[1] & 0x01; |
500 | if (s->bcj_active) { |
501 | if (s->temp.size - s->temp.pos < 2) |
502 | return XZ_OPTIONS_ERROR; |
503 | |
504 | ret = xz_dec_bcj_reset(s: s->bcj, id: s->temp.buf[s->temp.pos++]); |
505 | if (ret != XZ_OK) |
506 | return ret; |
507 | |
508 | /* |
509 | * We don't support custom start offset, |
510 | * so Size of Properties must be zero. |
511 | */ |
512 | if (s->temp.buf[s->temp.pos++] != 0x00) |
513 | return XZ_OPTIONS_ERROR; |
514 | } |
515 | #endif |
516 | |
517 | /* Valid Filter Flags always take at least two bytes. */ |
518 | if (s->temp.size - s->temp.pos < 2) |
519 | return XZ_DATA_ERROR; |
520 | |
521 | /* Filter ID = LZMA2 */ |
522 | if (s->temp.buf[s->temp.pos++] != 0x21) |
523 | return XZ_OPTIONS_ERROR; |
524 | |
525 | /* Size of Properties = 1-byte Filter Properties */ |
526 | if (s->temp.buf[s->temp.pos++] != 0x01) |
527 | return XZ_OPTIONS_ERROR; |
528 | |
529 | /* Filter Properties contains LZMA2 dictionary size. */ |
530 | if (s->temp.size - s->temp.pos < 1) |
531 | return XZ_DATA_ERROR; |
532 | |
533 | ret = xz_dec_lzma2_reset(s: s->lzma2, props: s->temp.buf[s->temp.pos++]); |
534 | if (ret != XZ_OK) |
535 | return ret; |
536 | |
537 | /* The rest must be Header Padding. */ |
538 | while (s->temp.pos < s->temp.size) |
539 | if (s->temp.buf[s->temp.pos++] != 0x00) |
540 | return XZ_OPTIONS_ERROR; |
541 | |
542 | s->temp.pos = 0; |
543 | s->block.compressed = 0; |
544 | s->block.uncompressed = 0; |
545 | |
546 | return XZ_OK; |
547 | } |
548 | |
549 | static enum xz_ret dec_main(struct xz_dec *s, struct xz_buf *b) |
550 | { |
551 | enum xz_ret ret; |
552 | |
553 | /* |
554 | * Store the start position for the case when we are in the middle |
555 | * of the Index field. |
556 | */ |
557 | s->in_start = b->in_pos; |
558 | |
559 | while (true) { |
560 | switch (s->sequence) { |
561 | case SEQ_STREAM_HEADER: |
562 | /* |
563 | * Stream Header is copied to s->temp, and then |
564 | * decoded from there. This way if the caller |
565 | * gives us only little input at a time, we can |
566 | * still keep the Stream Header decoding code |
567 | * simple. Similar approach is used in many places |
568 | * in this file. |
569 | */ |
570 | if (!fill_temp(s, b)) |
571 | return XZ_OK; |
572 | |
573 | /* |
574 | * If dec_stream_header() returns |
575 | * XZ_UNSUPPORTED_CHECK, it is still possible |
576 | * to continue decoding if working in multi-call |
577 | * mode. Thus, update s->sequence before calling |
578 | * dec_stream_header(). |
579 | */ |
580 | s->sequence = SEQ_BLOCK_START; |
581 | |
582 | ret = dec_stream_header(s); |
583 | if (ret != XZ_OK) |
584 | return ret; |
585 | |
586 | fallthrough; |
587 | |
588 | case SEQ_BLOCK_START: |
589 | /* We need one byte of input to continue. */ |
590 | if (b->in_pos == b->in_size) |
591 | return XZ_OK; |
592 | |
593 | /* See if this is the beginning of the Index field. */ |
594 | if (b->in[b->in_pos] == 0) { |
595 | s->in_start = b->in_pos++; |
596 | s->sequence = SEQ_INDEX; |
597 | break; |
598 | } |
599 | |
600 | /* |
601 | * Calculate the size of the Block Header and |
602 | * prepare to decode it. |
603 | */ |
604 | s->block_header.size |
605 | = ((uint32_t)b->in[b->in_pos] + 1) * 4; |
606 | |
607 | s->temp.size = s->block_header.size; |
608 | s->temp.pos = 0; |
609 | s->sequence = SEQ_BLOCK_HEADER; |
610 | |
611 | fallthrough; |
612 | |
613 | case SEQ_BLOCK_HEADER: |
614 | if (!fill_temp(s, b)) |
615 | return XZ_OK; |
616 | |
617 | ret = dec_block_header(s); |
618 | if (ret != XZ_OK) |
619 | return ret; |
620 | |
621 | s->sequence = SEQ_BLOCK_UNCOMPRESS; |
622 | |
623 | fallthrough; |
624 | |
625 | case SEQ_BLOCK_UNCOMPRESS: |
626 | ret = dec_block(s, b); |
627 | if (ret != XZ_STREAM_END) |
628 | return ret; |
629 | |
630 | s->sequence = SEQ_BLOCK_PADDING; |
631 | |
632 | fallthrough; |
633 | |
634 | case SEQ_BLOCK_PADDING: |
635 | /* |
636 | * Size of Compressed Data + Block Padding |
637 | * must be a multiple of four. We don't need |
638 | * s->block.compressed for anything else |
639 | * anymore, so we use it here to test the size |
640 | * of the Block Padding field. |
641 | */ |
642 | while (s->block.compressed & 3) { |
643 | if (b->in_pos == b->in_size) |
644 | return XZ_OK; |
645 | |
646 | if (b->in[b->in_pos++] != 0) |
647 | return XZ_DATA_ERROR; |
648 | |
649 | ++s->block.compressed; |
650 | } |
651 | |
652 | s->sequence = SEQ_BLOCK_CHECK; |
653 | |
654 | fallthrough; |
655 | |
656 | case SEQ_BLOCK_CHECK: |
657 | if (s->check_type == XZ_CHECK_CRC32) { |
658 | ret = crc32_validate(s, b); |
659 | if (ret != XZ_STREAM_END) |
660 | return ret; |
661 | } |
662 | #ifdef XZ_DEC_ANY_CHECK |
663 | else if (!check_skip(s, b)) { |
664 | return XZ_OK; |
665 | } |
666 | #endif |
667 | |
668 | s->sequence = SEQ_BLOCK_START; |
669 | break; |
670 | |
671 | case SEQ_INDEX: |
672 | ret = dec_index(s, b); |
673 | if (ret != XZ_STREAM_END) |
674 | return ret; |
675 | |
676 | s->sequence = SEQ_INDEX_PADDING; |
677 | |
678 | fallthrough; |
679 | |
680 | case SEQ_INDEX_PADDING: |
681 | while ((s->index.size + (b->in_pos - s->in_start)) |
682 | & 3) { |
683 | if (b->in_pos == b->in_size) { |
684 | index_update(s, b); |
685 | return XZ_OK; |
686 | } |
687 | |
688 | if (b->in[b->in_pos++] != 0) |
689 | return XZ_DATA_ERROR; |
690 | } |
691 | |
692 | /* Finish the CRC32 value and Index size. */ |
693 | index_update(s, b); |
694 | |
695 | /* Compare the hashes to validate the Index field. */ |
696 | if (!memeq(&s->block.hash, &s->index.hash, |
697 | sizeof(s->block.hash))) |
698 | return XZ_DATA_ERROR; |
699 | |
700 | s->sequence = SEQ_INDEX_CRC32; |
701 | |
702 | fallthrough; |
703 | |
704 | case SEQ_INDEX_CRC32: |
705 | ret = crc32_validate(s, b); |
706 | if (ret != XZ_STREAM_END) |
707 | return ret; |
708 | |
709 | s->temp.size = STREAM_HEADER_SIZE; |
710 | s->sequence = SEQ_STREAM_FOOTER; |
711 | |
712 | fallthrough; |
713 | |
714 | case SEQ_STREAM_FOOTER: |
715 | if (!fill_temp(s, b)) |
716 | return XZ_OK; |
717 | |
718 | return dec_stream_footer(s); |
719 | } |
720 | } |
721 | |
722 | /* Never reached */ |
723 | } |
724 | |
725 | /* |
726 | * xz_dec_run() is a wrapper for dec_main() to handle some special cases in |
727 | * multi-call and single-call decoding. |
728 | * |
729 | * In multi-call mode, we must return XZ_BUF_ERROR when it seems clear that we |
730 | * are not going to make any progress anymore. This is to prevent the caller |
731 | * from calling us infinitely when the input file is truncated or otherwise |
732 | * corrupt. Since zlib-style API allows that the caller fills the input buffer |
733 | * only when the decoder doesn't produce any new output, we have to be careful |
734 | * to avoid returning XZ_BUF_ERROR too easily: XZ_BUF_ERROR is returned only |
735 | * after the second consecutive call to xz_dec_run() that makes no progress. |
736 | * |
737 | * In single-call mode, if we couldn't decode everything and no error |
738 | * occurred, either the input is truncated or the output buffer is too small. |
739 | * Since we know that the last input byte never produces any output, we know |
740 | * that if all the input was consumed and decoding wasn't finished, the file |
741 | * must be corrupt. Otherwise the output buffer has to be too small or the |
742 | * file is corrupt in a way that decoding it produces too big output. |
743 | * |
744 | * If single-call decoding fails, we reset b->in_pos and b->out_pos back to |
745 | * their original values. This is because with some filter chains there won't |
746 | * be any valid uncompressed data in the output buffer unless the decoding |
747 | * actually succeeds (that's the price to pay of using the output buffer as |
748 | * the workspace). |
749 | */ |
750 | XZ_EXTERN enum xz_ret xz_dec_run(struct xz_dec *s, struct xz_buf *b) |
751 | { |
752 | size_t in_start; |
753 | size_t out_start; |
754 | enum xz_ret ret; |
755 | |
756 | if (DEC_IS_SINGLE(s->mode)) |
757 | xz_dec_reset(s); |
758 | |
759 | in_start = b->in_pos; |
760 | out_start = b->out_pos; |
761 | ret = dec_main(s, b); |
762 | |
763 | if (DEC_IS_SINGLE(s->mode)) { |
764 | if (ret == XZ_OK) |
765 | ret = b->in_pos == b->in_size |
766 | ? XZ_DATA_ERROR : XZ_BUF_ERROR; |
767 | |
768 | if (ret != XZ_STREAM_END) { |
769 | b->in_pos = in_start; |
770 | b->out_pos = out_start; |
771 | } |
772 | |
773 | } else if (ret == XZ_OK && in_start == b->in_pos |
774 | && out_start == b->out_pos) { |
775 | if (s->allow_buf_error) |
776 | ret = XZ_BUF_ERROR; |
777 | |
778 | s->allow_buf_error = true; |
779 | } else { |
780 | s->allow_buf_error = false; |
781 | } |
782 | |
783 | return ret; |
784 | } |
785 | |
786 | XZ_EXTERN struct xz_dec *xz_dec_init(enum xz_mode mode, uint32_t dict_max) |
787 | { |
788 | struct xz_dec *s = kmalloc(size: sizeof(*s), GFP_KERNEL); |
789 | if (s == NULL) |
790 | return NULL; |
791 | |
792 | s->mode = mode; |
793 | |
794 | #ifdef XZ_DEC_BCJ |
795 | s->bcj = xz_dec_bcj_create(DEC_IS_SINGLE(mode)); |
796 | if (s->bcj == NULL) |
797 | goto error_bcj; |
798 | #endif |
799 | |
800 | s->lzma2 = xz_dec_lzma2_create(mode, dict_max); |
801 | if (s->lzma2 == NULL) |
802 | goto error_lzma2; |
803 | |
804 | xz_dec_reset(s); |
805 | return s; |
806 | |
807 | error_lzma2: |
808 | #ifdef XZ_DEC_BCJ |
809 | xz_dec_bcj_end(s->bcj); |
810 | error_bcj: |
811 | #endif |
812 | kfree(objp: s); |
813 | return NULL; |
814 | } |
815 | |
816 | XZ_EXTERN void xz_dec_reset(struct xz_dec *s) |
817 | { |
818 | s->sequence = SEQ_STREAM_HEADER; |
819 | s->allow_buf_error = false; |
820 | s->pos = 0; |
821 | s->crc32 = 0; |
822 | memzero(&s->block, sizeof(s->block)); |
823 | memzero(&s->index, sizeof(s->index)); |
824 | s->temp.pos = 0; |
825 | s->temp.size = STREAM_HEADER_SIZE; |
826 | } |
827 | |
828 | XZ_EXTERN void xz_dec_end(struct xz_dec *s) |
829 | { |
830 | if (s != NULL) { |
831 | xz_dec_lzma2_end(s: s->lzma2); |
832 | #ifdef XZ_DEC_BCJ |
833 | xz_dec_bcj_end(s->bcj); |
834 | #endif |
835 | kfree(objp: s); |
836 | } |
837 | } |
838 | |