1 | /* |
2 | Copyright (c) 2015-2016, Apple Inc. All rights reserved. |
3 | |
4 | Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: |
5 | |
6 | 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. |
7 | |
8 | 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer |
9 | in the documentation and/or other materials provided with the distribution. |
10 | |
11 | 3. Neither the name of the copyright holder(s) nor the names of any contributors may be used to endorse or promote products derived |
12 | from this software without specific prior written permission. |
13 | |
14 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
15 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE |
16 | COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES |
17 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
18 | HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
19 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
20 | */ |
21 | |
22 | // LZFSE encoder |
23 | |
24 | #include "lzfse_internal.h" |
25 | #include "lzfse_encode_tables.h" |
26 | |
27 | /*! @abstract Get hash in range [0, LZFSE_ENCODE_HASH_VALUES-1] from 4 bytes in X. */ |
28 | static inline uint32_t hashX(uint32_t x) { |
29 | return (x * 2654435761U) >> |
30 | (32 - LZFSE_ENCODE_HASH_BITS); // Knuth multiplicative hash |
31 | } |
32 | |
33 | /*! @abstract Return value with all 0 except nbits<=32 unsigned bits from V |
34 | * at bit offset OFFSET. |
35 | * V is assumed to fit on nbits bits. */ |
36 | static inline uint64_t setField(uint32_t v, int offset, int nbits) { |
37 | assert(offset + nbits < 64 && offset >= 0 && nbits <= 32); |
38 | assert(nbits == 32 || (v < (1U << nbits))); |
39 | return ((uint64_t)v << (uint64_t)offset); |
40 | } |
41 | |
42 | /*! @abstract Encode all fields, except freq, from a |
43 | * lzfse_compressed_block_header_v1 to a lzfse_compressed_block_header_v2. |
44 | * All but the header_size and freq fields of the output are modified. */ |
45 | static inline void |
46 | lzfse_encode_v1_state(lzfse_compressed_block_header_v2 *out, |
47 | const lzfse_compressed_block_header_v1 *in) { |
48 | out->magic = LZFSE_COMPRESSEDV2_BLOCK_MAGIC; |
49 | out->n_raw_bytes = in->n_raw_bytes; |
50 | |
51 | // Literal state |
52 | out->packed_fields[0] = setField(in->n_literals, 0, 20) | |
53 | setField(in->n_literal_payload_bytes, 20, 20) | |
54 | setField(in->n_matches, 40, 20) | |
55 | setField(7 + in->literal_bits, 60, 3); |
56 | out->packed_fields[1] = setField(in->literal_state[0], 0, 10) | |
57 | setField(in->literal_state[1], 10, 10) | |
58 | setField(in->literal_state[2], 20, 10) | |
59 | setField(in->literal_state[3], 30, 10) | |
60 | setField(in->n_lmd_payload_bytes, 40, 20) | |
61 | setField(7 + in->lmd_bits, 60, 3); |
62 | out->packed_fields[2] = out->packed_fields[2] // header_size already stored in v[2] |
63 | | setField(in->l_state, 32, 10) | setField(in->m_state, 42, 10) | |
64 | setField(in->d_state, 52, 10); |
65 | } |
66 | |
67 | /*! @abstract Encode an entry value in a freq table. Return bits, and sets |
68 | * *nbits to the number of bits to serialize. */ |
69 | static inline uint32_t lzfse_encode_v1_freq_value(int value, int *nbits) { |
70 | // Fixed Huffman code, bits are read from LSB. |
71 | // Note that we rely on the position of the first '0' bit providing the number |
72 | // of bits. |
73 | switch (value) { |
74 | case 0: |
75 | *nbits = 2; |
76 | return 0; // 0.0 |
77 | case 1: |
78 | *nbits = 2; |
79 | return 2; // 1.0 |
80 | case 2: |
81 | *nbits = 3; |
82 | return 1; // 0.01 |
83 | case 3: |
84 | *nbits = 3; |
85 | return 5; // 1.01 |
86 | case 4: |
87 | *nbits = 5; |
88 | return 3; // 00.011 |
89 | case 5: |
90 | *nbits = 5; |
91 | return 11; // 01.011 |
92 | case 6: |
93 | *nbits = 5; |
94 | return 19; // 10.011 |
95 | case 7: |
96 | *nbits = 5; |
97 | return 27; // 11.011 |
98 | default: |
99 | break; |
100 | } |
101 | if (value < 24) { |
102 | *nbits = 8; // 4+4 |
103 | return 7 + ((value - 8) << 4); // xxxx.0111 |
104 | } |
105 | // 24..1047 |
106 | *nbits = 14; // 4+10 |
107 | return ((value - 24) << 4) + 15; // xxxxxxxxxx.1111 |
108 | } |
109 | |
110 | /*! @abstract Encode all tables from a lzfse_compressed_block_header_v1 |
111 | * to a lzfse_compressed_block_header_v2. |
112 | * Only the header_size and freq fields of the output are modified. |
113 | * @return Size of the lzfse_compressed_block_header_v2 */ |
114 | static inline size_t |
115 | lzfse_encode_v1_freq_table(lzfse_compressed_block_header_v2 *out, |
116 | const lzfse_compressed_block_header_v1 *in) { |
117 | uint32_t accum = 0; |
118 | int accum_nbits = 0; |
119 | const uint16_t *src = &(in->l_freq[0]); // first value of first table (struct |
120 | // will not be modified, so this code |
121 | // will remain valid) |
122 | uint8_t *dst = &(out->freq[0]); |
123 | for (int i = 0; i < LZFSE_ENCODE_L_SYMBOLS + LZFSE_ENCODE_M_SYMBOLS + |
124 | LZFSE_ENCODE_D_SYMBOLS + LZFSE_ENCODE_LITERAL_SYMBOLS; |
125 | i++) { |
126 | // Encode one value to accum |
127 | int nbits = 0; |
128 | uint32_t bits = lzfse_encode_v1_freq_value(src[i], &nbits); |
129 | assert(bits < (1 << nbits)); |
130 | accum |= bits << accum_nbits; |
131 | accum_nbits += nbits; |
132 | |
133 | // Store bytes from accum to output buffer |
134 | while (accum_nbits >= 8) { |
135 | *dst = (uint8_t)(accum & 0xff); |
136 | accum >>= 8; |
137 | accum_nbits -= 8; |
138 | dst++; |
139 | } |
140 | } |
141 | // Store final byte if needed |
142 | if (accum_nbits > 0) { |
143 | *dst = (uint8_t)(accum & 0xff); |
144 | dst++; |
145 | } |
146 | |
147 | // Return final size of out |
148 | uint32_t = (uint32_t)(dst - (uint8_t *)out); |
149 | out->packed_fields[0] = 0; |
150 | out->packed_fields[1] = 0; |
151 | out->packed_fields[2] = setField(header_size, 0, 32); |
152 | |
153 | return header_size; |
154 | } |
155 | |
156 | // We need to limit forward match length to make sure it won't split into a too |
157 | // large number of LMD. |
158 | // The limit itself is quite large, so it doesn't really impact compression |
159 | // ratio. |
160 | // The matches may still be expanded backwards by a few bytes, so the final |
161 | // length may be greater than this limit, which is OK. |
162 | #define LZFSE_ENCODE_MAX_MATCH_LENGTH (100 * LZFSE_ENCODE_MAX_M_VALUE) |
163 | |
164 | // =============================================================== |
165 | // Encoder back end |
166 | |
167 | /*! @abstract Encode matches stored in STATE into a compressed/uncompressed block. |
168 | * @return LZFSE_STATUS_OK on success. |
169 | * @return LZFSE_STATUS_DST_FULL and restore initial state if output buffer is |
170 | * full. */ |
171 | static int lzfse_encode_matches(lzfse_encoder_state *s) { |
172 | if (s->n_literals == 0 && s->n_matches == 0) |
173 | return LZFSE_STATUS_OK; // nothing to store, OK |
174 | |
175 | uint32_t l_occ[LZFSE_ENCODE_L_SYMBOLS]; |
176 | uint32_t m_occ[LZFSE_ENCODE_M_SYMBOLS]; |
177 | uint32_t d_occ[LZFSE_ENCODE_D_SYMBOLS]; |
178 | uint32_t literal_occ[LZFSE_ENCODE_LITERAL_SYMBOLS]; |
179 | fse_encoder_entry l_encoder[LZFSE_ENCODE_L_SYMBOLS]; |
180 | fse_encoder_entry m_encoder[LZFSE_ENCODE_M_SYMBOLS]; |
181 | fse_encoder_entry d_encoder[LZFSE_ENCODE_D_SYMBOLS]; |
182 | fse_encoder_entry literal_encoder[LZFSE_ENCODE_LITERAL_SYMBOLS]; |
183 | int ok = 1; |
184 | lzfse_compressed_block_header_v1 = {0}; |
185 | lzfse_compressed_block_header_v2 * = 0; |
186 | |
187 | // Keep initial state to be able to restore it if DST full |
188 | uint8_t *dst0 = s->dst; |
189 | uint32_t n_literals0 = s->n_literals; |
190 | |
191 | // Add 0x00 literals until n_literals multiple of 4, since we encode 4 |
192 | // interleaved literal streams. |
193 | while (s->n_literals & 3) { |
194 | uint32_t n = s->n_literals++; |
195 | s->literals[n] = 0; |
196 | } |
197 | |
198 | // Encode previous distance |
199 | uint32_t d_prev = 0; |
200 | for (uint32_t i = 0; i < s->n_matches; i++) { |
201 | uint32_t d = s->d_values[i]; |
202 | if (d == d_prev) |
203 | s->d_values[i] = 0; |
204 | else |
205 | d_prev = d; |
206 | } |
207 | |
208 | // Clear occurrence tables |
209 | memset(l_occ, 0, sizeof(l_occ)); |
210 | memset(m_occ, 0, sizeof(m_occ)); |
211 | memset(d_occ, 0, sizeof(d_occ)); |
212 | memset(literal_occ, 0, sizeof(literal_occ)); |
213 | |
214 | // Update occurrence tables in all 4 streams (L,M,D,literals) |
215 | uint32_t l_sum = 0; |
216 | uint32_t m_sum = 0; |
217 | for (uint32_t i = 0; i < s->n_matches; i++) { |
218 | uint32_t l = s->l_values[i]; |
219 | l_sum += l; |
220 | l_occ[l_base_from_value(l)]++; |
221 | } |
222 | for (uint32_t i = 0; i < s->n_matches; i++) { |
223 | uint32_t m = s->m_values[i]; |
224 | m_sum += m; |
225 | m_occ[m_base_from_value(m)]++; |
226 | } |
227 | for (uint32_t i = 0; i < s->n_matches; i++) |
228 | d_occ[d_base_from_value(s->d_values[i])]++; |
229 | for (uint32_t i = 0; i < s->n_literals; i++) |
230 | literal_occ[s->literals[i]]++; |
231 | |
232 | // Make sure we have enough room for a _full_ V2 header |
233 | if (s->dst + sizeof(lzfse_compressed_block_header_v2) > s->dst_end) { |
234 | ok = 0; |
235 | goto END; |
236 | } |
237 | header2 = (lzfse_compressed_block_header_v2 *)(s->dst); |
238 | |
239 | // Setup header V1 |
240 | header1.magic = LZFSE_COMPRESSEDV1_BLOCK_MAGIC; |
241 | header1.n_raw_bytes = m_sum + l_sum; |
242 | header1.n_matches = s->n_matches; |
243 | header1.n_literals = s->n_literals; |
244 | |
245 | // Normalize occurrence tables to freq tables |
246 | fse_normalize_freq(LZFSE_ENCODE_L_STATES, LZFSE_ENCODE_L_SYMBOLS, l_occ, |
247 | header1.l_freq); |
248 | fse_normalize_freq(LZFSE_ENCODE_M_STATES, LZFSE_ENCODE_M_SYMBOLS, m_occ, |
249 | header1.m_freq); |
250 | fse_normalize_freq(LZFSE_ENCODE_D_STATES, LZFSE_ENCODE_D_SYMBOLS, d_occ, |
251 | header1.d_freq); |
252 | fse_normalize_freq(LZFSE_ENCODE_LITERAL_STATES, LZFSE_ENCODE_LITERAL_SYMBOLS, |
253 | literal_occ, header1.literal_freq); |
254 | |
255 | // Compress freq tables to V2 header, and get actual size of V2 header |
256 | s->dst += lzfse_encode_v1_freq_table(header2, &header1); |
257 | |
258 | // Initialize encoder tables from freq tables |
259 | fse_init_encoder_table(LZFSE_ENCODE_L_STATES, LZFSE_ENCODE_L_SYMBOLS, |
260 | header1.l_freq, l_encoder); |
261 | fse_init_encoder_table(LZFSE_ENCODE_M_STATES, LZFSE_ENCODE_M_SYMBOLS, |
262 | header1.m_freq, m_encoder); |
263 | fse_init_encoder_table(LZFSE_ENCODE_D_STATES, LZFSE_ENCODE_D_SYMBOLS, |
264 | header1.d_freq, d_encoder); |
265 | fse_init_encoder_table(LZFSE_ENCODE_LITERAL_STATES, |
266 | LZFSE_ENCODE_LITERAL_SYMBOLS, header1.literal_freq, |
267 | literal_encoder); |
268 | |
269 | // Encode literals |
270 | { |
271 | fse_out_stream out; |
272 | fse_out_init(&out); |
273 | fse_state state0, state1, state2, state3; |
274 | state0 = state1 = state2 = state3 = 0; |
275 | |
276 | uint8_t *buf = s->dst; |
277 | uint32_t i = s->n_literals; // I multiple of 4 |
278 | // We encode starting from the last literal so we can decode starting from |
279 | // the first |
280 | while (i > 0) { |
281 | if (buf + 16 > s->dst_end) { |
282 | ok = 0; |
283 | goto END; |
284 | } // out full |
285 | i -= 4; |
286 | fse_encode(&state3, literal_encoder, &out, s->literals[i + 3]); // 10b |
287 | fse_encode(&state2, literal_encoder, &out, s->literals[i + 2]); // 10b |
288 | #if !FSE_IOSTREAM_64 |
289 | fse_out_flush(&out, &buf); |
290 | #endif |
291 | fse_encode(&state1, literal_encoder, &out, s->literals[i + 1]); // 10b |
292 | fse_encode(&state0, literal_encoder, &out, s->literals[i + 0]); // 10b |
293 | fse_out_flush(&out, &buf); |
294 | } |
295 | fse_out_finish(&out, &buf); |
296 | |
297 | // Update header with final encoder state |
298 | header1.literal_bits = out.accum_nbits; // [-7, 0] |
299 | header1.n_literal_payload_bytes = (uint32_t)(buf - s->dst); |
300 | header1.literal_state[0] = state0; |
301 | header1.literal_state[1] = state1; |
302 | header1.literal_state[2] = state2; |
303 | header1.literal_state[3] = state3; |
304 | |
305 | // Update state |
306 | s->dst = buf; |
307 | |
308 | } // literals |
309 | |
310 | // Encode L,M,D |
311 | { |
312 | fse_out_stream out; |
313 | fse_out_init(&out); |
314 | fse_state l_state, m_state, d_state; |
315 | l_state = m_state = d_state = 0; |
316 | |
317 | uint8_t *buf = s->dst; |
318 | uint32_t i = s->n_matches; |
319 | |
320 | // Add 8 padding bytes to the L,M,D payload |
321 | if (buf + 8 > s->dst_end) { |
322 | ok = 0; |
323 | goto END; |
324 | } // out full |
325 | store8(buf, 0); |
326 | buf += 8; |
327 | |
328 | // We encode starting from the last match so we can decode starting from the |
329 | // first |
330 | while (i > 0) { |
331 | if (buf + 16 > s->dst_end) { |
332 | ok = 0; |
333 | goto END; |
334 | } // out full |
335 | i -= 1; |
336 | |
337 | // D requires 23b max |
338 | int32_t d_value = s->d_values[i]; |
339 | uint8_t d_symbol = d_base_from_value(d_value); |
340 | int32_t d_nbits = d_extra_bits[d_symbol]; |
341 | int32_t d_bits = d_value - d_base_value[d_symbol]; |
342 | fse_out_push(&out, d_nbits, d_bits); |
343 | fse_encode(&d_state, d_encoder, &out, d_symbol); |
344 | #if !FSE_IOSTREAM_64 |
345 | fse_out_flush(&out, &buf); |
346 | #endif |
347 | |
348 | // M requires 17b max |
349 | int32_t m_value = s->m_values[i]; |
350 | uint8_t m_symbol = m_base_from_value(m_value); |
351 | int32_t m_nbits = m_extra_bits[m_symbol]; |
352 | int32_t m_bits = m_value - m_base_value[m_symbol]; |
353 | fse_out_push(&out, m_nbits, m_bits); |
354 | fse_encode(&m_state, m_encoder, &out, m_symbol); |
355 | #if !FSE_IOSTREAM_64 |
356 | fse_out_flush(&out, &buf); |
357 | #endif |
358 | |
359 | // L requires 14b max |
360 | int32_t l_value = s->l_values[i]; |
361 | uint8_t l_symbol = l_base_from_value(l_value); |
362 | int32_t l_nbits = l_extra_bits[l_symbol]; |
363 | int32_t l_bits = l_value - l_base_value[l_symbol]; |
364 | fse_out_push(&out, l_nbits, l_bits); |
365 | fse_encode(&l_state, l_encoder, &out, l_symbol); |
366 | fse_out_flush(&out, &buf); |
367 | } |
368 | fse_out_finish(&out, &buf); |
369 | |
370 | // Update header with final encoder state |
371 | header1.n_lmd_payload_bytes = (uint32_t)(buf - s->dst); |
372 | header1.lmd_bits = out.accum_nbits; // [-7, 0] |
373 | header1.l_state = l_state; |
374 | header1.m_state = m_state; |
375 | header1.d_state = d_state; |
376 | |
377 | // Update state |
378 | s->dst = buf; |
379 | |
380 | } // L,M,D |
381 | |
382 | // Final state update, here we had enough space in DST, and are not going to |
383 | // revert state |
384 | s->n_literals = 0; |
385 | s->n_matches = 0; |
386 | |
387 | // Final payload size |
388 | header1.n_payload_bytes = |
389 | header1.n_literal_payload_bytes + header1.n_lmd_payload_bytes; |
390 | |
391 | // Encode state info in V2 header (we previously encoded the tables, now we |
392 | // set the other fields) |
393 | lzfse_encode_v1_state(header2, &header1); |
394 | |
395 | END: |
396 | if (!ok) { |
397 | // Revert state, DST was full |
398 | |
399 | // Revert the d_prev encoding |
400 | uint32_t d_prev = 0; |
401 | for (uint32_t i = 0; i < s->n_matches; i++) { |
402 | uint32_t d = s->d_values[i]; |
403 | if (d == 0) |
404 | s->d_values[i] = d_prev; |
405 | else |
406 | d_prev = d; |
407 | } |
408 | |
409 | // Revert literal count |
410 | s->n_literals = n_literals0; |
411 | |
412 | // Revert DST |
413 | s->dst = dst0; |
414 | |
415 | return LZFSE_STATUS_DST_FULL; // DST full |
416 | } |
417 | |
418 | return LZFSE_STATUS_OK; |
419 | } |
420 | |
421 | /*! @abstract Push a L,M,D match into the STATE. |
422 | * @return LZFSE_STATUS_OK if OK. |
423 | * @return LZFSE_STATUS_DST_FULL if the match can't be pushed, meaning one of |
424 | * the buffers is full. In that case the state is not modified. */ |
425 | static inline int lzfse_push_lmd(lzfse_encoder_state *s, uint32_t L, |
426 | uint32_t M, uint32_t D) { |
427 | // Check if we have enough space to push the match (we add some margin to copy |
428 | // literals faster here, and round final count later) |
429 | if (s->n_matches + 1 + 8 > LZFSE_MATCHES_PER_BLOCK) |
430 | return LZFSE_STATUS_DST_FULL; // state full |
431 | if (s->n_literals + L + 16 > LZFSE_LITERALS_PER_BLOCK) |
432 | return LZFSE_STATUS_DST_FULL; // state full |
433 | |
434 | // Store match |
435 | uint32_t n = s->n_matches++; |
436 | s->l_values[n] = L; |
437 | s->m_values[n] = M; |
438 | s->d_values[n] = D; |
439 | |
440 | // Store literals |
441 | uint8_t *dst = s->literals + s->n_literals; |
442 | const uint8_t *src = s->src + s->src_literal; |
443 | uint8_t *dst_end = dst + L; |
444 | if (s->src_literal + L + 16 > s->src_end) { |
445 | // Careful at the end of SRC, we can't read 16 bytes |
446 | if (L > 0) |
447 | memcpy(dst, src, L); |
448 | } else { |
449 | copy16(dst, src); |
450 | dst += 16; |
451 | src += 16; |
452 | while (dst < dst_end) { |
453 | copy16(dst, src); |
454 | dst += 16; |
455 | src += 16; |
456 | } |
457 | } |
458 | s->n_literals += L; |
459 | |
460 | // Update state |
461 | s->src_literal += L + M; |
462 | |
463 | return LZFSE_STATUS_OK; |
464 | } |
465 | |
466 | /*! @abstract Split MATCH into one or more L,M,D parts, and push to STATE. |
467 | * @return LZFSE_STATUS_OK if OK. |
468 | * @return LZFSE_STATUS_DST_FULL if the match can't be pushed, meaning one of the |
469 | * buffers is full. In that case the state is not modified. */ |
470 | static int lzfse_push_match(lzfse_encoder_state *s, const lzfse_match *match) { |
471 | // Save the initial n_matches, n_literals, src_literal |
472 | uint32_t n_matches0 = s->n_matches; |
473 | uint32_t n_literals0 = s->n_literals; |
474 | lzfse_offset src_literals0 = s->src_literal; |
475 | |
476 | // L,M,D |
477 | uint32_t L = (uint32_t)(match->pos - s->src_literal); // literal count |
478 | uint32_t M = match->length; // match length |
479 | uint32_t D = (uint32_t)(match->pos - match->ref); // match distance |
480 | int ok = 1; |
481 | |
482 | // Split L if too large |
483 | while (L > LZFSE_ENCODE_MAX_L_VALUE) { |
484 | if (lzfse_push_lmd(s, LZFSE_ENCODE_MAX_L_VALUE, 0, 1) != 0) { |
485 | ok = 0; |
486 | goto END; |
487 | } // take D=1 because most frequent, but not actually used |
488 | L -= LZFSE_ENCODE_MAX_L_VALUE; |
489 | } |
490 | |
491 | // Split if M too large |
492 | while (M > LZFSE_ENCODE_MAX_M_VALUE) { |
493 | if (lzfse_push_lmd(s, L, LZFSE_ENCODE_MAX_M_VALUE, D) != 0) { |
494 | ok = 0; |
495 | goto END; |
496 | } |
497 | L = 0; |
498 | M -= LZFSE_ENCODE_MAX_M_VALUE; |
499 | } |
500 | |
501 | // L,M in range |
502 | if (L > 0 || M > 0) { |
503 | if (lzfse_push_lmd(s, L, M, D) != 0) { |
504 | ok = 0; |
505 | goto END; |
506 | } |
507 | L = M = 0; |
508 | (void)L; |
509 | (void)M; // dead stores |
510 | } |
511 | |
512 | END: |
513 | if (!ok) { |
514 | // Revert state |
515 | s->n_matches = n_matches0; |
516 | s->n_literals = n_literals0; |
517 | s->src_literal = src_literals0; |
518 | |
519 | return LZFSE_STATUS_DST_FULL; // state tables full |
520 | } |
521 | |
522 | return LZFSE_STATUS_OK; // OK |
523 | } |
524 | |
525 | /*! @abstract Backend: add MATCH to state S. Encode block if necessary, when |
526 | * state is full. |
527 | * @return LZFSE_STATUS_OK if OK. |
528 | * @return LZFSE_STATUS_DST_FULL if the match can't be added, meaning one of the |
529 | * buffers is full. In that case the state is not modified. */ |
530 | static int lzfse_backend_match(lzfse_encoder_state *s, |
531 | const lzfse_match *match) { |
532 | // Try to push the match in state |
533 | if (lzfse_push_match(s, match) == LZFSE_STATUS_OK) |
534 | return LZFSE_STATUS_OK; // OK, match added to state |
535 | |
536 | // Here state tables are full, try to emit block |
537 | if (lzfse_encode_matches(s) != LZFSE_STATUS_OK) |
538 | return LZFSE_STATUS_DST_FULL; // DST full, match not added |
539 | |
540 | // Here block has been emitted, re-try to push the match in state |
541 | return lzfse_push_match(s, match); |
542 | } |
543 | |
544 | /*! @abstract Backend: add L literals to state S. Encode block if necessary, |
545 | * when state is full. |
546 | * @return LZFSE_STATUS_OK if OK. |
547 | * @return LZFSE_STATUS_DST_FULL if the literals can't be added, meaning one of |
548 | * the buffers is full. In that case the state is not modified. */ |
549 | static int lzfse_backend_literals(lzfse_encoder_state *s, lzfse_offset L) { |
550 | // Create a fake match with M=0, D=1 |
551 | lzfse_match match; |
552 | lzfse_offset pos = s->src_literal + L; |
553 | match.pos = pos; |
554 | match.ref = match.pos - 1; |
555 | match.length = 0; |
556 | return lzfse_backend_match(s, &match); |
557 | } |
558 | |
559 | /*! @abstract Backend: flush final block, and emit end of stream |
560 | * @return LZFSE_STATUS_OK if OK. |
561 | * @return LZFSE_STATUS_DST_FULL if either the final block, or the end-of-stream |
562 | * can't be added, meaning one of the buffers is full. If the block was emitted, |
563 | * the state is updated to reflect this. Otherwise, it is left unchanged. */ |
564 | static int lzfse_backend_end_of_stream(lzfse_encoder_state *s) { |
565 | // Final match triggers write, otherwise emit blocks when we have enough |
566 | // matches stored |
567 | if (lzfse_encode_matches(s) != LZFSE_STATUS_OK) |
568 | return LZFSE_STATUS_DST_FULL; // DST full |
569 | |
570 | // Emit end-of-stream block |
571 | if (s->dst + 4 > s->dst_end) |
572 | return LZFSE_STATUS_DST_FULL; // DST full |
573 | store4(s->dst, LZFSE_ENDOFSTREAM_BLOCK_MAGIC); |
574 | s->dst += 4; |
575 | |
576 | return LZFSE_STATUS_OK; // OK |
577 | } |
578 | |
579 | // =============================================================== |
580 | // Encoder state management |
581 | |
582 | /*! @abstract Initialize state: |
583 | * @code |
584 | * - hash table with all invalid pos, and value 0. |
585 | * - pending match to NO_MATCH. |
586 | * - src_literal to 0. |
587 | * - d_prev to 0. |
588 | @endcode |
589 | * @return LZFSE_STATUS_OK */ |
590 | int lzfse_encode_init(lzfse_encoder_state *s) { |
591 | const lzfse_match NO_MATCH = {0}; |
592 | lzfse_history_set line; |
593 | for (int i = 0; i < LZFSE_ENCODE_HASH_WIDTH; i++) { |
594 | line.pos[i] = -4 * LZFSE_ENCODE_MAX_D_VALUE; // invalid pos |
595 | line.value[i] = 0; |
596 | } |
597 | // Fill table |
598 | for (int i = 0; i < LZFSE_ENCODE_HASH_VALUES; i++) |
599 | s->history_table[i] = line; |
600 | s->pending = NO_MATCH; |
601 | s->src_literal = 0; |
602 | |
603 | return LZFSE_STATUS_OK; // OK |
604 | } |
605 | |
606 | /*! @abstract Translate state \p src forward by \p delta > 0. |
607 | * Offsets in \p src are updated backwards to point to the same positions. |
608 | * @return LZFSE_STATUS_OK */ |
609 | int lzfse_encode_translate(lzfse_encoder_state *s, lzfse_offset delta) { |
610 | assert(delta >= 0); |
611 | if (delta == 0) |
612 | return LZFSE_STATUS_OK; // OK |
613 | |
614 | // SRC |
615 | s->src += delta; |
616 | |
617 | // Offsets in SRC |
618 | s->src_end -= delta; |
619 | s->src_encode_i -= delta; |
620 | s->src_encode_end -= delta; |
621 | s->src_literal -= delta; |
622 | |
623 | // Pending match |
624 | s->pending.pos -= delta; |
625 | s->pending.ref -= delta; |
626 | |
627 | // history_table positions, translated, and clamped to invalid pos |
628 | int32_t invalidPos = -4 * LZFSE_ENCODE_MAX_D_VALUE; |
629 | for (int i = 0; i < LZFSE_ENCODE_HASH_VALUES; i++) { |
630 | int32_t *p = &(s->history_table[i].pos[0]); |
631 | for (int j = 0; j < LZFSE_ENCODE_HASH_WIDTH; j++) { |
632 | lzfse_offset newPos = p[j] - delta; // translate |
633 | p[j] = (int32_t)((newPos < invalidPos) ? invalidPos : newPos); // clamp |
634 | } |
635 | } |
636 | |
637 | return LZFSE_STATUS_OK; // OK |
638 | } |
639 | |
640 | // =============================================================== |
641 | // Encoder front end |
642 | |
643 | int lzfse_encode_base(lzfse_encoder_state *s) { |
644 | lzfse_history_set *history_table = s->history_table; |
645 | lzfse_history_set *hashLine = 0; |
646 | lzfse_history_set newH; |
647 | const lzfse_match NO_MATCH = {0}; |
648 | int ok = 1; |
649 | |
650 | memset(&newH, 0x00, sizeof(newH)); |
651 | |
652 | // 8 byte padding at end of buffer |
653 | s->src_encode_end = s->src_end - 8; |
654 | for (; s->src_encode_i < s->src_encode_end; s->src_encode_i++) { |
655 | lzfse_offset pos = s->src_encode_i; // pos >= 0 |
656 | |
657 | // Load 4 byte value and get hash line |
658 | uint32_t x = load4(s->src + pos); |
659 | hashLine = history_table + hashX(x); |
660 | lzfse_history_set h = *hashLine; |
661 | |
662 | // Prepare next hash line (component 0 is the most recent) to prepare new |
663 | // entries (stored later) |
664 | { |
665 | newH.pos[0] = (int32_t)pos; |
666 | for (int k = 0; k < LZFSE_ENCODE_HASH_WIDTH - 1; k++) |
667 | newH.pos[k + 1] = h.pos[k]; |
668 | newH.value[0] = x; |
669 | for (int k = 0; k < LZFSE_ENCODE_HASH_WIDTH - 1; k++) |
670 | newH.value[k + 1] = h.value[k]; |
671 | } |
672 | |
673 | // Do not look for a match if we are still covered by a previous match |
674 | if (pos < s->src_literal) |
675 | goto END_POS; |
676 | |
677 | // Search best incoming match |
678 | lzfse_match incoming = {.pos = pos, .ref = 0, .length = 0}; |
679 | |
680 | // Check for matches. We consider matches of length >= 4 only. |
681 | for (int k = 0; k < LZFSE_ENCODE_HASH_WIDTH; k++) { |
682 | uint32_t d = h.value[k] ^ x; |
683 | if (d) |
684 | continue; // no 4 byte match |
685 | int32_t ref = h.pos[k]; |
686 | if (ref + LZFSE_ENCODE_MAX_D_VALUE < pos) |
687 | continue; // too far |
688 | |
689 | const uint8_t *src_ref = s->src + ref; |
690 | const uint8_t *src_pos = s->src + pos; |
691 | uint32_t length = 4; |
692 | uint32_t maxLength = |
693 | (uint32_t)(s->src_end - pos - 8); // ensure we don't hit the end of SRC |
694 | while (length < maxLength) { |
695 | uint64_t d = load8(src_ref + length) ^ load8(src_pos + length); |
696 | if (d == 0) { |
697 | length += 8; |
698 | continue; |
699 | } |
700 | |
701 | length += |
702 | (__builtin_ctzll(d) >> 3); // ctzll must be called only with D != 0 |
703 | break; |
704 | } |
705 | if (length > incoming.length) { |
706 | incoming.length = length; |
707 | incoming.ref = ref; |
708 | } // keep if longer |
709 | } |
710 | |
711 | // No incoming match? |
712 | if (incoming.length == 0) { |
713 | // We may still want to emit some literals here, to not lag too far behind |
714 | // the current search point, and avoid |
715 | // ending up with a literal block not fitting in the state. |
716 | lzfse_offset n_literals = pos - s->src_literal; |
717 | // The threshold here should be larger than a couple of MAX_L_VALUE, and |
718 | // much smaller than LITERALS_PER_BLOCK |
719 | if (n_literals > 8 * LZFSE_ENCODE_MAX_L_VALUE) { |
720 | // Here, we need to consume some literals. Emit pending match if there |
721 | // is one |
722 | if (s->pending.length > 0) { |
723 | if (lzfse_backend_match(s, &s->pending) != LZFSE_STATUS_OK) { |
724 | ok = 0; |
725 | goto END; |
726 | } |
727 | s->pending = NO_MATCH; |
728 | } else { |
729 | // No pending match, emit a full LZFSE_ENCODE_MAX_L_VALUE block of |
730 | // literals |
731 | if (lzfse_backend_literals(s, LZFSE_ENCODE_MAX_L_VALUE) != |
732 | LZFSE_STATUS_OK) { |
733 | ok = 0; |
734 | goto END; |
735 | } |
736 | } |
737 | } |
738 | goto END_POS; // no incoming match |
739 | } |
740 | |
741 | // Limit match length (it may still be expanded backwards, but this is |
742 | // bounded by the limit on literals we tested before) |
743 | if (incoming.length > LZFSE_ENCODE_MAX_MATCH_LENGTH) { |
744 | incoming.length = LZFSE_ENCODE_MAX_MATCH_LENGTH; |
745 | } |
746 | |
747 | // Expand backwards (since this is expensive, we do this for the best match |
748 | // only) |
749 | while (incoming.pos > s->src_literal && incoming.ref > 0 && |
750 | s->src[incoming.ref - 1] == s->src[incoming.pos - 1]) { |
751 | incoming.pos--; |
752 | incoming.ref--; |
753 | } |
754 | incoming.length += pos - incoming.pos; // update length after expansion |
755 | |
756 | // Match filtering heuristic (from LZVN). INCOMING is always defined here. |
757 | |
758 | // Incoming is 'good', emit incoming |
759 | if (incoming.length >= LZFSE_ENCODE_GOOD_MATCH) { |
760 | if (lzfse_backend_match(s, &incoming) != LZFSE_STATUS_OK) { |
761 | ok = 0; |
762 | goto END; |
763 | } |
764 | s->pending = NO_MATCH; |
765 | goto END_POS; |
766 | } |
767 | |
768 | // No pending, keep incoming |
769 | if (s->pending.length == 0) { |
770 | s->pending = incoming; |
771 | goto END_POS; |
772 | } |
773 | |
774 | // No overlap, emit pending, keep incoming |
775 | if (s->pending.pos + s->pending.length <= incoming.pos) { |
776 | if (lzfse_backend_match(s, &s->pending) != LZFSE_STATUS_OK) { |
777 | ok = 0; |
778 | goto END; |
779 | } |
780 | s->pending = incoming; |
781 | goto END_POS; |
782 | } |
783 | |
784 | // Overlap: emit longest |
785 | if (incoming.length > s->pending.length) { |
786 | if (lzfse_backend_match(s, &incoming) != LZFSE_STATUS_OK) { |
787 | ok = 0; |
788 | goto END; |
789 | } |
790 | } else { |
791 | if (lzfse_backend_match(s, &s->pending) != LZFSE_STATUS_OK) { |
792 | ok = 0; |
793 | goto END; |
794 | } |
795 | } |
796 | s->pending = NO_MATCH; |
797 | |
798 | END_POS: |
799 | // We are done with this src_encode_i. |
800 | // Update state now (s->pending has already been updated). |
801 | *hashLine = newH; |
802 | } |
803 | |
804 | END: |
805 | return ok ? LZFSE_STATUS_OK : LZFSE_STATUS_DST_FULL; |
806 | } |
807 | |
808 | int lzfse_encode_finish(lzfse_encoder_state *s) { |
809 | const lzfse_match NO_MATCH = {0}; |
810 | |
811 | // Emit pending match |
812 | if (s->pending.length > 0) { |
813 | if (lzfse_backend_match(s, &s->pending) != LZFSE_STATUS_OK) |
814 | return LZFSE_STATUS_DST_FULL; |
815 | s->pending = NO_MATCH; |
816 | } |
817 | |
818 | // Emit final literals if any |
819 | lzfse_offset L = s->src_end - s->src_literal; |
820 | if (L > 0) { |
821 | if (lzfse_backend_literals(s, L) != LZFSE_STATUS_OK) |
822 | return LZFSE_STATUS_DST_FULL; |
823 | } |
824 | |
825 | // Emit all matches, and end-of-stream block |
826 | if (lzfse_backend_end_of_stream(s) != LZFSE_STATUS_OK) |
827 | return LZFSE_STATUS_DST_FULL; |
828 | |
829 | return LZFSE_STATUS_OK; |
830 | } |
831 | |