1 | /* |
2 | Copyright (c) 2015-2016, Apple Inc. All rights reserved. |
3 | |
4 | Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: |
5 | |
6 | 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. |
7 | |
8 | 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer |
9 | in the documentation and/or other materials provided with the distribution. |
10 | |
11 | 3. Neither the name of the copyright holder(s) nor the names of any contributors may be used to endorse or promote products derived |
12 | from this software without specific prior written permission. |
13 | |
14 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
15 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE |
16 | COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES |
17 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
18 | HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
19 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
20 | */ |
21 | |
22 | #include "lzfse_internal.h" |
23 | #include "lzvn_decode_base.h" |
24 | |
25 | /*! @abstract Decode an entry value from next bits of stream. |
26 | * Return \p value, and set \p *nbits to the number of bits to consume |
27 | * (starting with LSB). */ |
28 | static inline int lzfse_decode_v1_freq_value(uint32_t bits, int *nbits) { |
29 | static const int8_t lzfse_freq_nbits_table[32] = { |
30 | 2, 3, 2, 5, 2, 3, 2, 8, 2, 3, 2, 5, 2, 3, 2, 14, |
31 | 2, 3, 2, 5, 2, 3, 2, 8, 2, 3, 2, 5, 2, 3, 2, 14}; |
32 | static const int8_t lzfse_freq_value_table[32] = { |
33 | 0, 2, 1, 4, 0, 3, 1, -1, 0, 2, 1, 5, 0, 3, 1, -1, |
34 | 0, 2, 1, 6, 0, 3, 1, -1, 0, 2, 1, 7, 0, 3, 1, -1}; |
35 | |
36 | uint32_t b = bits & 31; // lower 5 bits |
37 | int n = lzfse_freq_nbits_table[b]; |
38 | *nbits = n; |
39 | |
40 | // Special cases for > 5 bits encoding |
41 | if (n == 8) |
42 | return 8 + ((bits >> 4) & 0xf); |
43 | if (n == 14) |
44 | return 24 + ((bits >> 4) & 0x3ff); |
45 | |
46 | // <= 5 bits encoding from table |
47 | return lzfse_freq_value_table[b]; |
48 | } |
49 | |
50 | /*! @abstract Extracts up to 32 bits from a 64-bit field beginning at |
51 | * \p offset, and zero-extends them to a \p uint32_t. |
52 | * |
53 | * If we number the bits of \p v from 0 (least significant) to 63 (most |
54 | * significant), the result is bits \p offset to \p offset+nbits-1. */ |
55 | static inline uint32_t get_field(uint64_t v, int offset, int nbits) { |
56 | assert(offset + nbits < 64 && offset >= 0 && nbits <= 32); |
57 | if (nbits == 32) |
58 | return (uint32_t)(v >> offset); |
59 | return (uint32_t)((v >> offset) & ((1 << nbits) - 1)); |
60 | } |
61 | |
62 | /*! @abstract Return \c header_size field from a \c lzfse_compressed_block_header_v2. */ |
63 | static inline uint32_t |
64 | (const lzfse_compressed_block_header_v2 *in) { |
65 | return get_field(in->packed_fields[2], 0, 32); |
66 | } |
67 | |
68 | /*! @abstract Decode all fields from a \c lzfse_compressed_block_header_v2 to a |
69 | * \c lzfse_compressed_block_header_v1. |
70 | * @return 0 on success. |
71 | * @return -1 on failure. */ |
72 | static inline int lzfse_decode_v1(lzfse_compressed_block_header_v1 *out, |
73 | const lzfse_compressed_block_header_v2 *in) { |
74 | // Clear all fields |
75 | memset(out, 0x00, sizeof(lzfse_compressed_block_header_v1)); |
76 | |
77 | uint64_t v0 = in->packed_fields[0]; |
78 | uint64_t v1 = in->packed_fields[1]; |
79 | uint64_t v2 = in->packed_fields[2]; |
80 | |
81 | out->magic = LZFSE_COMPRESSEDV1_BLOCK_MAGIC; |
82 | out->n_raw_bytes = in->n_raw_bytes; |
83 | |
84 | // Literal state |
85 | out->n_literals = get_field(v0, 0, 20); |
86 | out->n_literal_payload_bytes = get_field(v0, 20, 20); |
87 | out->literal_bits = (int)get_field(v0, 60, 3) - 7; |
88 | out->literal_state[0] = get_field(v1, 0, 10); |
89 | out->literal_state[1] = get_field(v1, 10, 10); |
90 | out->literal_state[2] = get_field(v1, 20, 10); |
91 | out->literal_state[3] = get_field(v1, 30, 10); |
92 | |
93 | // L,M,D state |
94 | out->n_matches = get_field(v0, 40, 20); |
95 | out->n_lmd_payload_bytes = get_field(v1, 40, 20); |
96 | out->lmd_bits = (int)get_field(v1, 60, 3) - 7; |
97 | out->l_state = get_field(v2, 32, 10); |
98 | out->m_state = get_field(v2, 42, 10); |
99 | out->d_state = get_field(v2, 52, 10); |
100 | |
101 | // Total payload size |
102 | out->n_payload_bytes = |
103 | out->n_literal_payload_bytes + out->n_lmd_payload_bytes; |
104 | |
105 | // Freq tables |
106 | uint16_t *dst = &(out->l_freq[0]); |
107 | const uint8_t *src = &(in->freq[0]); |
108 | const uint8_t *src_end = |
109 | (const uint8_t *)in + get_field(v2, 0, 32); // first byte after header |
110 | uint32_t accum = 0; |
111 | int accum_nbits = 0; |
112 | |
113 | // No freq tables? |
114 | if (src_end == src) |
115 | return 0; // OK, freq tables were omitted |
116 | |
117 | for (int i = 0; i < LZFSE_ENCODE_L_SYMBOLS + LZFSE_ENCODE_M_SYMBOLS + |
118 | LZFSE_ENCODE_D_SYMBOLS + LZFSE_ENCODE_LITERAL_SYMBOLS; |
119 | i++) { |
120 | // Refill accum, one byte at a time, until we reach end of header, or accum |
121 | // is full |
122 | while (src < src_end && accum_nbits + 8 <= 32) { |
123 | accum |= (uint32_t)(*src) << accum_nbits; |
124 | accum_nbits += 8; |
125 | src++; |
126 | } |
127 | |
128 | // Decode and store value |
129 | int nbits = 0; |
130 | dst[i] = lzfse_decode_v1_freq_value(accum, &nbits); |
131 | |
132 | if (nbits > accum_nbits) |
133 | return -1; // failed |
134 | |
135 | // Consume nbits bits |
136 | accum >>= nbits; |
137 | accum_nbits -= nbits; |
138 | } |
139 | |
140 | if (accum_nbits >= 8 || src != src_end) |
141 | return -1; // we need to end up exactly at the end of header, with less than |
142 | // 8 bits in accumulator |
143 | |
144 | return 0; |
145 | } |
146 | |
147 | static inline void copy(uint8_t *dst, const uint8_t *src, size_t length) { |
148 | const uint8_t *dst_end = dst + length; |
149 | do { |
150 | copy8(dst, src); |
151 | dst += 8; |
152 | src += 8; |
153 | } while (dst < dst_end); |
154 | } |
155 | |
156 | static int lzfse_decode_lmd(lzfse_decoder_state *s) { |
157 | lzfse_compressed_block_decoder_state *bs = &(s->compressed_lzfse_block_state); |
158 | fse_state l_state = bs->l_state; |
159 | fse_state m_state = bs->m_state; |
160 | fse_state d_state = bs->d_state; |
161 | fse_in_stream in = bs->lmd_in_stream; |
162 | const uint8_t *src_start = s->src_begin; |
163 | const uint8_t *src = s->src + bs->lmd_in_buf; |
164 | const uint8_t *lit = bs->current_literal; |
165 | uint8_t *dst = s->dst; |
166 | uint32_t symbols = bs->n_matches; |
167 | int32_t L = bs->l_value; |
168 | int32_t M = bs->m_value; |
169 | int32_t D = bs->d_value; |
170 | |
171 | assert(l_state < LZFSE_ENCODE_L_STATES); |
172 | assert(m_state < LZFSE_ENCODE_M_STATES); |
173 | assert(d_state < LZFSE_ENCODE_D_STATES); |
174 | |
175 | // Number of bytes remaining in the destination buffer, minus 32 to |
176 | // provide a margin of safety for using overlarge copies on the fast path. |
177 | // This is a signed quantity, and may go negative when we are close to the |
178 | // end of the buffer. That's OK; we're careful about how we handle it |
179 | // in the slow-and-careful match execution path. |
180 | ptrdiff_t remaining_bytes = s->dst_end - dst - 32; |
181 | |
182 | // If L or M is non-zero, that means that we have already started decoding |
183 | // this block, and that we needed to interrupt decoding to get more space |
184 | // from the caller. There's a pending L, M, D triplet that we weren't |
185 | // able to completely process. Jump ahead to finish executing that symbol |
186 | // before decoding new values. |
187 | if (L || M) |
188 | goto ExecuteMatch; |
189 | |
190 | while (symbols > 0) { |
191 | int res; |
192 | // Decode the next L, M, D symbol from the input stream. |
193 | res = fse_in_flush(&in, &src, src_start); |
194 | if (res) { |
195 | return LZFSE_STATUS_ERROR; |
196 | } |
197 | L = fse_value_decode(&l_state, bs->l_decoder, &in); |
198 | assert(l_state < LZFSE_ENCODE_L_STATES); |
199 | if ((lit + L) >= (bs->literals + LZFSE_LITERALS_PER_BLOCK + 64)) { |
200 | return LZFSE_STATUS_ERROR; |
201 | } |
202 | res = fse_in_flush2(&in, &src, src_start); |
203 | if (res) { |
204 | return LZFSE_STATUS_ERROR; |
205 | } |
206 | M = fse_value_decode(&m_state, bs->m_decoder, &in); |
207 | assert(m_state < LZFSE_ENCODE_M_STATES); |
208 | res = fse_in_flush2(&in, &src, src_start); |
209 | if (res) { |
210 | return LZFSE_STATUS_ERROR; |
211 | } |
212 | int32_t new_d = fse_value_decode(&d_state, bs->d_decoder, &in); |
213 | assert(d_state < LZFSE_ENCODE_D_STATES); |
214 | D = new_d ? new_d : D; |
215 | symbols--; |
216 | |
217 | ExecuteMatch: |
218 | // Error if D is out of range, so that we avoid passing through |
219 | // uninitialized data or accesssing memory out of the destination |
220 | // buffer. |
221 | if ((uint32_t)D > dst + L - s->dst_begin) |
222 | return LZFSE_STATUS_ERROR; |
223 | |
224 | if (L + M <= remaining_bytes) { |
225 | // If we have plenty of space remaining, we can copy the literal |
226 | // and match with 16- and 32-byte operations, without worrying |
227 | // about writing off the end of the buffer. |
228 | remaining_bytes -= L + M; |
229 | copy(dst, lit, L); |
230 | dst += L; |
231 | lit += L; |
232 | // For the match, we have two paths; a fast copy by 16-bytes if |
233 | // the match distance is large enough to allow it, and a more |
234 | // careful path that applies a permutation to account for the |
235 | // possible overlap between source and destination if the distance |
236 | // is small. |
237 | if (D >= 8 || D >= M) |
238 | copy(dst, dst - D, M); |
239 | else |
240 | for (size_t i = 0; i < M; i++) |
241 | dst[i] = dst[i - D]; |
242 | dst += M; |
243 | } |
244 | |
245 | else { |
246 | // Otherwise, we are very close to the end of the destination |
247 | // buffer, so we cannot use wide copies that slop off the end |
248 | // of the region that we are copying to. First, we restore |
249 | // the true length remaining, rather than the sham value we've |
250 | // been using so far. |
251 | remaining_bytes += 32; |
252 | // Now, we process the literal. Either there's space for it |
253 | // or there isn't; if there is, we copy the whole thing and |
254 | // update all the pointers and lengths to reflect the copy. |
255 | if (L <= remaining_bytes) { |
256 | for (size_t i = 0; i < L; i++) |
257 | dst[i] = lit[i]; |
258 | dst += L; |
259 | lit += L; |
260 | remaining_bytes -= L; |
261 | L = 0; |
262 | } |
263 | // There isn't enough space to fit the whole literal. Copy as |
264 | // much of it as we can, update the pointers and the value of |
265 | // L, and report that the destination buffer is full. Note that |
266 | // we always write right up to the end of the destination buffer. |
267 | else { |
268 | for (size_t i = 0; i < remaining_bytes; i++) |
269 | dst[i] = lit[i]; |
270 | dst += remaining_bytes; |
271 | lit += remaining_bytes; |
272 | L -= remaining_bytes; |
273 | goto DestinationBufferIsFull; |
274 | } |
275 | // The match goes just like the literal does. We copy as much as |
276 | // we can byte-by-byte, and if we reach the end of the buffer |
277 | // before finishing, we return to the caller indicating that |
278 | // the buffer is full. |
279 | if (M <= remaining_bytes) { |
280 | for (size_t i = 0; i < M; i++) |
281 | dst[i] = dst[i - D]; |
282 | dst += M; |
283 | remaining_bytes -= M; |
284 | M = 0; |
285 | (void)M; // no dead store warning |
286 | // We don't need to update M = 0, because there's no partial |
287 | // symbol to continue executing. Either we're at the end of |
288 | // the block, in which case we will never need to resume with |
289 | // this state, or we're going to decode another L, M, D set, |
290 | // which will overwrite M anyway. |
291 | // |
292 | // But we still set M = 0, to maintain the post-condition. |
293 | } else { |
294 | for (size_t i = 0; i < remaining_bytes; i++) |
295 | dst[i] = dst[i - D]; |
296 | dst += remaining_bytes; |
297 | M -= remaining_bytes; |
298 | DestinationBufferIsFull: |
299 | // Because we want to be able to resume decoding where we've left |
300 | // off (even in the middle of a literal or match), we need to |
301 | // update all of the block state fields with the current values |
302 | // so that we can resume execution from this point once the |
303 | // caller has given us more space to write into. |
304 | bs->l_value = L; |
305 | bs->m_value = M; |
306 | bs->d_value = D; |
307 | bs->l_state = l_state; |
308 | bs->m_state = m_state; |
309 | bs->d_state = d_state; |
310 | bs->lmd_in_stream = in; |
311 | bs->n_matches = symbols; |
312 | bs->lmd_in_buf = (uint32_t)(src - s->src); |
313 | bs->current_literal = lit; |
314 | s->dst = dst; |
315 | return LZFSE_STATUS_DST_FULL; |
316 | } |
317 | // Restore the "sham" decremented value of remaining_bytes and |
318 | // continue to the next L, M, D triple. We'll just be back in |
319 | // the careful path again, but this only happens at the very end |
320 | // of the buffer, so a little minor inefficiency here is a good |
321 | // tradeoff for simpler code. |
322 | remaining_bytes -= 32; |
323 | } |
324 | } |
325 | // Because we've finished with the whole block, we don't need to update |
326 | // any of the blockstate fields; they will not be used again. We just |
327 | // update the destination pointer in the state object and return. |
328 | s->dst = dst; |
329 | return LZFSE_STATUS_OK; |
330 | } |
331 | |
332 | int lzfse_decode(lzfse_decoder_state *s) { |
333 | while (1) { |
334 | // Are we inside a block? |
335 | switch (s->block_magic) { |
336 | case LZFSE_NO_BLOCK_MAGIC: { |
337 | // We need at least 4 bytes of magic number to identify next block |
338 | if (s->src + 4 > s->src_end) |
339 | return LZFSE_STATUS_SRC_EMPTY; // SRC truncated |
340 | uint32_t magic = load4(s->src); |
341 | |
342 | if (magic == LZFSE_ENDOFSTREAM_BLOCK_MAGIC) { |
343 | s->src += 4; |
344 | s->end_of_stream = 1; |
345 | return LZFSE_STATUS_OK; // done |
346 | } |
347 | |
348 | if (magic == LZFSE_UNCOMPRESSED_BLOCK_MAGIC) { |
349 | if (s->src + sizeof(uncompressed_block_header) > s->src_end) |
350 | return LZFSE_STATUS_SRC_EMPTY; // SRC truncated |
351 | // Setup state for uncompressed block |
352 | uncompressed_block_decoder_state *bs = &(s->uncompressed_block_state); |
353 | bs->n_raw_bytes = |
354 | load4(s->src + offsetof(uncompressed_block_header, n_raw_bytes)); |
355 | s->src += sizeof(uncompressed_block_header); |
356 | s->block_magic = magic; |
357 | break; |
358 | } |
359 | |
360 | if (magic == LZFSE_COMPRESSEDLZVN_BLOCK_MAGIC) { |
361 | if (s->src + sizeof(lzvn_compressed_block_header) > s->src_end) |
362 | return LZFSE_STATUS_SRC_EMPTY; // SRC truncated |
363 | // Setup state for compressed LZVN block |
364 | lzvn_compressed_block_decoder_state *bs = |
365 | &(s->compressed_lzvn_block_state); |
366 | bs->n_raw_bytes = |
367 | load4(s->src + offsetof(lzvn_compressed_block_header, n_raw_bytes)); |
368 | bs->n_payload_bytes = load4( |
369 | s->src + offsetof(lzvn_compressed_block_header, n_payload_bytes)); |
370 | bs->d_prev = 0; |
371 | s->src += sizeof(lzvn_compressed_block_header); |
372 | s->block_magic = magic; |
373 | break; |
374 | } |
375 | |
376 | if (magic == LZFSE_COMPRESSEDV1_BLOCK_MAGIC || |
377 | magic == LZFSE_COMPRESSEDV2_BLOCK_MAGIC) { |
378 | lzfse_compressed_block_header_v1 ; |
379 | size_t = 0; |
380 | |
381 | // Decode compressed headers |
382 | if (magic == LZFSE_COMPRESSEDV2_BLOCK_MAGIC) { |
383 | // Check we have the fixed part of the structure |
384 | if (s->src + offsetof(lzfse_compressed_block_header_v2, freq) > s->src_end) |
385 | return LZFSE_STATUS_SRC_EMPTY; // SRC truncated |
386 | |
387 | // Get size, and check we have the entire structure |
388 | const lzfse_compressed_block_header_v2 * = |
389 | (const lzfse_compressed_block_header_v2 *)s->src; // not aligned, OK |
390 | header_size = lzfse_decode_v2_header_size(header2); |
391 | if (s->src + header_size > s->src_end) |
392 | return LZFSE_STATUS_SRC_EMPTY; // SRC truncated |
393 | int decodeStatus = lzfse_decode_v1(&header1, header2); |
394 | if (decodeStatus != 0) |
395 | return LZFSE_STATUS_ERROR; // failed |
396 | } else { |
397 | if (s->src + sizeof(lzfse_compressed_block_header_v1) > s->src_end) |
398 | return LZFSE_STATUS_SRC_EMPTY; // SRC truncated |
399 | memcpy(&header1, s->src, sizeof(lzfse_compressed_block_header_v1)); |
400 | header_size = sizeof(lzfse_compressed_block_header_v1); |
401 | } |
402 | |
403 | // We require the header + entire encoded block to be present in SRC |
404 | // during the entire block decoding. |
405 | // This can be relaxed somehow, if it becomes a limiting factor, at the |
406 | // price of a more complex state maintenance. |
407 | // For DST, we can't easily require space for the entire decoded block, |
408 | // because it may expand to something very very large. |
409 | if (s->src + header_size + header1.n_literal_payload_bytes + |
410 | header1.n_lmd_payload_bytes > |
411 | s->src_end) |
412 | return LZFSE_STATUS_SRC_EMPTY; // need all encoded block |
413 | |
414 | // Sanity checks |
415 | if (lzfse_check_block_header_v1(&header1) != 0) { |
416 | return LZFSE_STATUS_ERROR; |
417 | } |
418 | |
419 | // Skip header |
420 | s->src += header_size; |
421 | |
422 | // Setup state for compressed V1 block from header |
423 | lzfse_compressed_block_decoder_state *bs = |
424 | &(s->compressed_lzfse_block_state); |
425 | bs->n_lmd_payload_bytes = header1.n_lmd_payload_bytes; |
426 | bs->n_matches = header1.n_matches; |
427 | fse_init_decoder_table(LZFSE_ENCODE_LITERAL_STATES, |
428 | LZFSE_ENCODE_LITERAL_SYMBOLS, |
429 | header1.literal_freq, bs->literal_decoder); |
430 | fse_init_value_decoder_table( |
431 | LZFSE_ENCODE_L_STATES, LZFSE_ENCODE_L_SYMBOLS, header1.l_freq, |
432 | l_extra_bits, l_base_value, bs->l_decoder); |
433 | fse_init_value_decoder_table( |
434 | LZFSE_ENCODE_M_STATES, LZFSE_ENCODE_M_SYMBOLS, header1.m_freq, |
435 | m_extra_bits, m_base_value, bs->m_decoder); |
436 | fse_init_value_decoder_table( |
437 | LZFSE_ENCODE_D_STATES, LZFSE_ENCODE_D_SYMBOLS, header1.d_freq, |
438 | d_extra_bits, d_base_value, bs->d_decoder); |
439 | |
440 | // Decode literals |
441 | { |
442 | fse_in_stream in; |
443 | const uint8_t *buf_start = s->src_begin; |
444 | s->src += header1.n_literal_payload_bytes; // skip literal payload |
445 | const uint8_t *buf = s->src; // read bits backwards from the end |
446 | if (fse_in_init(&in, header1.literal_bits, &buf, buf_start) != 0) |
447 | return LZFSE_STATUS_ERROR; |
448 | |
449 | fse_state state0 = header1.literal_state[0]; |
450 | fse_state state1 = header1.literal_state[1]; |
451 | fse_state state2 = header1.literal_state[2]; |
452 | fse_state state3 = header1.literal_state[3]; |
453 | |
454 | for (uint32_t i = 0; i < header1.n_literals; i += 4) // n_literals is multiple of 4 |
455 | { |
456 | #if FSE_IOSTREAM_64 |
457 | if (fse_in_flush(&in, &buf, buf_start) != 0) |
458 | return LZFSE_STATUS_ERROR; // [57, 64] bits |
459 | bs->literals[i + 0] = |
460 | fse_decode(&state0, bs->literal_decoder, &in); // 10b max |
461 | bs->literals[i + 1] = |
462 | fse_decode(&state1, bs->literal_decoder, &in); // 10b max |
463 | bs->literals[i + 2] = |
464 | fse_decode(&state2, bs->literal_decoder, &in); // 10b max |
465 | bs->literals[i + 3] = |
466 | fse_decode(&state3, bs->literal_decoder, &in); // 10b max |
467 | #else |
468 | if (fse_in_flush(&in, &buf, buf_start) != 0) |
469 | return LZFSE_STATUS_ERROR; // [25, 23] bits |
470 | bs->literals[i + 0] = |
471 | fse_decode(&state0, bs->literal_decoder, &in); // 10b max |
472 | bs->literals[i + 1] = |
473 | fse_decode(&state1, bs->literal_decoder, &in); // 10b max |
474 | if (fse_in_flush(&in, &buf, buf_start) != 0) |
475 | return LZFSE_STATUS_ERROR; // [25, 23] bits |
476 | bs->literals[i + 2] = |
477 | fse_decode(&state2, bs->literal_decoder, &in); // 10b max |
478 | bs->literals[i + 3] = |
479 | fse_decode(&state3, bs->literal_decoder, &in); // 10b max |
480 | #endif |
481 | } |
482 | |
483 | bs->current_literal = bs->literals; |
484 | } // literals |
485 | |
486 | // SRC is not incremented to skip the LMD payload, since we need it |
487 | // during block decode. |
488 | // We will increment SRC at the end of the block only after this point. |
489 | |
490 | // Initialize the L,M,D decode stream, do not start decoding matches |
491 | // yet, and store decoder state |
492 | { |
493 | fse_in_stream in; |
494 | // read bits backwards from the end |
495 | const uint8_t *buf = s->src + header1.n_lmd_payload_bytes; |
496 | if (fse_in_init(&in, header1.lmd_bits, &buf, s->src) != 0) |
497 | return LZFSE_STATUS_ERROR; |
498 | |
499 | bs->l_state = header1.l_state; |
500 | bs->m_state = header1.m_state; |
501 | bs->d_state = header1.d_state; |
502 | bs->lmd_in_buf = (uint32_t)(buf - s->src); |
503 | bs->l_value = bs->m_value = 0; |
504 | // Initialize D to an illegal value so we can't erroneously use |
505 | // an uninitialized "previous" value. |
506 | bs->d_value = -1; |
507 | bs->lmd_in_stream = in; |
508 | } |
509 | |
510 | s->block_magic = magic; |
511 | break; |
512 | } |
513 | |
514 | // Here we have an invalid magic number |
515 | return LZFSE_STATUS_ERROR; |
516 | } // LZFSE_NO_BLOCK_MAGIC |
517 | |
518 | case LZFSE_UNCOMPRESSED_BLOCK_MAGIC: { |
519 | uncompressed_block_decoder_state *bs = &(s->uncompressed_block_state); |
520 | |
521 | // Compute the size (in bytes) of the data that we will actually copy. |
522 | // This size is minimum(bs->n_raw_bytes, space in src, space in dst). |
523 | |
524 | uint32_t copy_size = bs->n_raw_bytes; // bytes left to copy |
525 | if (copy_size == 0) { |
526 | s->block_magic = 0; |
527 | break; |
528 | } // end of block |
529 | |
530 | if (s->src_end <= s->src) |
531 | return LZFSE_STATUS_SRC_EMPTY; // need more SRC data |
532 | const size_t src_space = s->src_end - s->src; |
533 | if (copy_size > src_space) |
534 | copy_size = (uint32_t)src_space; // limit to SRC data (> 0) |
535 | |
536 | if (s->dst_end <= s->dst) |
537 | return LZFSE_STATUS_DST_FULL; // need more DST capacity |
538 | const size_t dst_space = s->dst_end - s->dst; |
539 | if (copy_size > dst_space) |
540 | copy_size = (uint32_t)dst_space; // limit to DST capacity (> 0) |
541 | |
542 | // Now that we know that the copy size is bounded to the source and |
543 | // dest buffers, go ahead and copy the data. |
544 | // We always have copy_size > 0 here |
545 | memcpy(s->dst, s->src, copy_size); |
546 | s->src += copy_size; |
547 | s->dst += copy_size; |
548 | bs->n_raw_bytes -= copy_size; |
549 | |
550 | break; |
551 | } // LZFSE_UNCOMPRESSED_BLOCK_MAGIC |
552 | |
553 | case LZFSE_COMPRESSEDV1_BLOCK_MAGIC: |
554 | case LZFSE_COMPRESSEDV2_BLOCK_MAGIC: { |
555 | lzfse_compressed_block_decoder_state *bs = |
556 | &(s->compressed_lzfse_block_state); |
557 | // Require the entire LMD payload to be in SRC |
558 | if (s->src_end <= s->src || |
559 | bs->n_lmd_payload_bytes > (size_t)(s->src_end - s->src)) |
560 | return LZFSE_STATUS_SRC_EMPTY; |
561 | |
562 | int status = lzfse_decode_lmd(s); |
563 | if (status != LZFSE_STATUS_OK) |
564 | return status; |
565 | |
566 | s->block_magic = LZFSE_NO_BLOCK_MAGIC; |
567 | s->src += bs->n_lmd_payload_bytes; // to next block |
568 | break; |
569 | } // LZFSE_COMPRESSEDV1_BLOCK_MAGIC || LZFSE_COMPRESSEDV2_BLOCK_MAGIC |
570 | |
571 | case LZFSE_COMPRESSEDLZVN_BLOCK_MAGIC: { |
572 | lzvn_compressed_block_decoder_state *bs = |
573 | &(s->compressed_lzvn_block_state); |
574 | if (bs->n_payload_bytes > 0 && s->src_end <= s->src) |
575 | return LZFSE_STATUS_SRC_EMPTY; // need more SRC data |
576 | |
577 | // Init LZVN decoder state |
578 | lzvn_decoder_state dstate; |
579 | memset(&dstate, 0x00, sizeof(dstate)); |
580 | dstate.src = s->src; |
581 | dstate.src_end = s->src_end; |
582 | if (dstate.src_end - s->src > bs->n_payload_bytes) |
583 | dstate.src_end = s->src + bs->n_payload_bytes; // limit to payload bytes |
584 | dstate.dst_begin = s->dst_begin; |
585 | dstate.dst = s->dst; |
586 | dstate.dst_end = s->dst_end; |
587 | if (dstate.dst_end - s->dst > bs->n_raw_bytes) |
588 | dstate.dst_end = s->dst + bs->n_raw_bytes; // limit to raw bytes |
589 | dstate.d_prev = bs->d_prev; |
590 | dstate.end_of_stream = 0; |
591 | |
592 | // Run LZVN decoder |
593 | lzvn_decode(&dstate); |
594 | |
595 | // Update our state |
596 | size_t src_used = dstate.src - s->src; |
597 | size_t dst_used = dstate.dst - s->dst; |
598 | if (src_used > bs->n_payload_bytes || dst_used > bs->n_raw_bytes) |
599 | return LZFSE_STATUS_ERROR; // sanity check |
600 | s->src = dstate.src; |
601 | s->dst = dstate.dst; |
602 | bs->n_payload_bytes -= (uint32_t)src_used; |
603 | bs->n_raw_bytes -= (uint32_t)dst_used; |
604 | bs->d_prev = (uint32_t)dstate.d_prev; |
605 | |
606 | // Test end of block |
607 | if (bs->n_payload_bytes == 0 && bs->n_raw_bytes == 0 && |
608 | dstate.end_of_stream) { |
609 | s->block_magic = 0; |
610 | break; |
611 | } // block done |
612 | |
613 | // Check for invalid state |
614 | if (bs->n_payload_bytes == 0 || bs->n_raw_bytes == 0 || |
615 | dstate.end_of_stream) |
616 | return LZFSE_STATUS_ERROR; |
617 | |
618 | // Here, block is not done and state is valid, so we need more space in dst. |
619 | return LZFSE_STATUS_DST_FULL; |
620 | } |
621 | |
622 | default: |
623 | return LZFSE_STATUS_ERROR; // invalid magic |
624 | |
625 | } // switch magic |
626 | |
627 | } // block loop |
628 | |
629 | return LZFSE_STATUS_OK; |
630 | } |
631 | |