1/*
2Copyright (c) 2015-2016, Apple Inc. All rights reserved.
3
4Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
5
61. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
7
82. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer
9 in the documentation and/or other materials provided with the distribution.
10
113. Neither the name of the copyright holder(s) nor the names of any contributors may be used to endorse or promote products derived
12 from this software without specific prior written permission.
13
14THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
15LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
16COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
17(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
18HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
19ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
20*/
21
22// LZFSE encoder
23
24#include "lzfse_internal.h"
25#include "lzfse_encode_tables.h"
26
27/*! @abstract Get hash in range [0, LZFSE_ENCODE_HASH_VALUES-1] from 4 bytes in X. */
28static inline uint32_t hashX(uint32_t x) {
29 return (x * 2654435761U) >>
30 (32 - LZFSE_ENCODE_HASH_BITS); // Knuth multiplicative hash
31}
32
33/*! @abstract Return value with all 0 except nbits<=32 unsigned bits from V
34 * at bit offset OFFSET.
35 * V is assumed to fit on nbits bits. */
36static inline uint64_t setField(uint32_t v, int offset, int nbits) {
37 assert(offset + nbits < 64 && offset >= 0 && nbits <= 32);
38 assert(nbits == 32 || (v < (1U << nbits)));
39 return ((uint64_t)v << (uint64_t)offset);
40}
41
42/*! @abstract Encode all fields, except freq, from a
43 * lzfse_compressed_block_header_v1 to a lzfse_compressed_block_header_v2.
44 * All but the header_size and freq fields of the output are modified. */
45static inline void
46lzfse_encode_v1_state(lzfse_compressed_block_header_v2 *out,
47 const lzfse_compressed_block_header_v1 *in) {
48 out->magic = LZFSE_COMPRESSEDV2_BLOCK_MAGIC;
49 out->n_raw_bytes = in->n_raw_bytes;
50
51 // Literal state
52 out->packed_fields[0] = setField(in->n_literals, 0, 20) |
53 setField(in->n_literal_payload_bytes, 20, 20) |
54 setField(in->n_matches, 40, 20) |
55 setField(7 + in->literal_bits, 60, 3);
56 out->packed_fields[1] = setField(in->literal_state[0], 0, 10) |
57 setField(in->literal_state[1], 10, 10) |
58 setField(in->literal_state[2], 20, 10) |
59 setField(in->literal_state[3], 30, 10) |
60 setField(in->n_lmd_payload_bytes, 40, 20) |
61 setField(7 + in->lmd_bits, 60, 3);
62 out->packed_fields[2] = out->packed_fields[2] // header_size already stored in v[2]
63 | setField(in->l_state, 32, 10) | setField(in->m_state, 42, 10) |
64 setField(in->d_state, 52, 10);
65}
66
67/*! @abstract Encode an entry value in a freq table. Return bits, and sets
68 * *nbits to the number of bits to serialize. */
69static inline uint32_t lzfse_encode_v1_freq_value(int value, int *nbits) {
70 // Fixed Huffman code, bits are read from LSB.
71 // Note that we rely on the position of the first '0' bit providing the number
72 // of bits.
73 switch (value) {
74 case 0:
75 *nbits = 2;
76 return 0; // 0.0
77 case 1:
78 *nbits = 2;
79 return 2; // 1.0
80 case 2:
81 *nbits = 3;
82 return 1; // 0.01
83 case 3:
84 *nbits = 3;
85 return 5; // 1.01
86 case 4:
87 *nbits = 5;
88 return 3; // 00.011
89 case 5:
90 *nbits = 5;
91 return 11; // 01.011
92 case 6:
93 *nbits = 5;
94 return 19; // 10.011
95 case 7:
96 *nbits = 5;
97 return 27; // 11.011
98 default:
99 break;
100 }
101 if (value < 24) {
102 *nbits = 8; // 4+4
103 return 7 + ((value - 8) << 4); // xxxx.0111
104 }
105 // 24..1047
106 *nbits = 14; // 4+10
107 return ((value - 24) << 4) + 15; // xxxxxxxxxx.1111
108}
109
110/*! @abstract Encode all tables from a lzfse_compressed_block_header_v1
111 * to a lzfse_compressed_block_header_v2.
112 * Only the header_size and freq fields of the output are modified.
113 * @return Size of the lzfse_compressed_block_header_v2 */
114static inline size_t
115lzfse_encode_v1_freq_table(lzfse_compressed_block_header_v2 *out,
116 const lzfse_compressed_block_header_v1 *in) {
117 uint32_t accum = 0;
118 int accum_nbits = 0;
119 const uint16_t *src = &(in->l_freq[0]); // first value of first table (struct
120 // will not be modified, so this code
121 // will remain valid)
122 uint8_t *dst = &(out->freq[0]);
123 for (int i = 0; i < LZFSE_ENCODE_L_SYMBOLS + LZFSE_ENCODE_M_SYMBOLS +
124 LZFSE_ENCODE_D_SYMBOLS + LZFSE_ENCODE_LITERAL_SYMBOLS;
125 i++) {
126 // Encode one value to accum
127 int nbits = 0;
128 uint32_t bits = lzfse_encode_v1_freq_value(src[i], &nbits);
129 assert(bits < (1 << nbits));
130 accum |= bits << accum_nbits;
131 accum_nbits += nbits;
132
133 // Store bytes from accum to output buffer
134 while (accum_nbits >= 8) {
135 *dst = (uint8_t)(accum & 0xff);
136 accum >>= 8;
137 accum_nbits -= 8;
138 dst++;
139 }
140 }
141 // Store final byte if needed
142 if (accum_nbits > 0) {
143 *dst = (uint8_t)(accum & 0xff);
144 dst++;
145 }
146
147 // Return final size of out
148 uint32_t header_size = (uint32_t)(dst - (uint8_t *)out);
149 out->packed_fields[0] = 0;
150 out->packed_fields[1] = 0;
151 out->packed_fields[2] = setField(header_size, 0, 32);
152
153 return header_size;
154}
155
156// We need to limit forward match length to make sure it won't split into a too
157// large number of LMD.
158// The limit itself is quite large, so it doesn't really impact compression
159// ratio.
160// The matches may still be expanded backwards by a few bytes, so the final
161// length may be greater than this limit, which is OK.
162#define LZFSE_ENCODE_MAX_MATCH_LENGTH (100 * LZFSE_ENCODE_MAX_M_VALUE)
163
164// ===============================================================
165// Encoder back end
166
167/*! @abstract Encode matches stored in STATE into a compressed/uncompressed block.
168 * @return LZFSE_STATUS_OK on success.
169 * @return LZFSE_STATUS_DST_FULL and restore initial state if output buffer is
170 * full. */
171static int lzfse_encode_matches(lzfse_encoder_state *s) {
172 if (s->n_literals == 0 && s->n_matches == 0)
173 return LZFSE_STATUS_OK; // nothing to store, OK
174
175 uint32_t l_occ[LZFSE_ENCODE_L_SYMBOLS];
176 uint32_t m_occ[LZFSE_ENCODE_M_SYMBOLS];
177 uint32_t d_occ[LZFSE_ENCODE_D_SYMBOLS];
178 uint32_t literal_occ[LZFSE_ENCODE_LITERAL_SYMBOLS];
179 fse_encoder_entry l_encoder[LZFSE_ENCODE_L_SYMBOLS];
180 fse_encoder_entry m_encoder[LZFSE_ENCODE_M_SYMBOLS];
181 fse_encoder_entry d_encoder[LZFSE_ENCODE_D_SYMBOLS];
182 fse_encoder_entry literal_encoder[LZFSE_ENCODE_LITERAL_SYMBOLS];
183 int ok = 1;
184 lzfse_compressed_block_header_v1 header1 = {0};
185 lzfse_compressed_block_header_v2 *header2 = 0;
186
187 // Keep initial state to be able to restore it if DST full
188 uint8_t *dst0 = s->dst;
189 uint32_t n_literals0 = s->n_literals;
190
191 // Add 0x00 literals until n_literals multiple of 4, since we encode 4
192 // interleaved literal streams.
193 while (s->n_literals & 3) {
194 uint32_t n = s->n_literals++;
195 s->literals[n] = 0;
196 }
197
198 // Encode previous distance
199 uint32_t d_prev = 0;
200 for (uint32_t i = 0; i < s->n_matches; i++) {
201 uint32_t d = s->d_values[i];
202 if (d == d_prev)
203 s->d_values[i] = 0;
204 else
205 d_prev = d;
206 }
207
208 // Clear occurrence tables
209 memset(l_occ, 0, sizeof(l_occ));
210 memset(m_occ, 0, sizeof(m_occ));
211 memset(d_occ, 0, sizeof(d_occ));
212 memset(literal_occ, 0, sizeof(literal_occ));
213
214 // Update occurrence tables in all 4 streams (L,M,D,literals)
215 uint32_t l_sum = 0;
216 uint32_t m_sum = 0;
217 for (uint32_t i = 0; i < s->n_matches; i++) {
218 uint32_t l = s->l_values[i];
219 l_sum += l;
220 l_occ[l_base_from_value(l)]++;
221 }
222 for (uint32_t i = 0; i < s->n_matches; i++) {
223 uint32_t m = s->m_values[i];
224 m_sum += m;
225 m_occ[m_base_from_value(m)]++;
226 }
227 for (uint32_t i = 0; i < s->n_matches; i++)
228 d_occ[d_base_from_value(s->d_values[i])]++;
229 for (uint32_t i = 0; i < s->n_literals; i++)
230 literal_occ[s->literals[i]]++;
231
232 // Make sure we have enough room for a _full_ V2 header
233 if (s->dst + sizeof(lzfse_compressed_block_header_v2) > s->dst_end) {
234 ok = 0;
235 goto END;
236 }
237 header2 = (lzfse_compressed_block_header_v2 *)(s->dst);
238
239 // Setup header V1
240 header1.magic = LZFSE_COMPRESSEDV1_BLOCK_MAGIC;
241 header1.n_raw_bytes = m_sum + l_sum;
242 header1.n_matches = s->n_matches;
243 header1.n_literals = s->n_literals;
244
245 // Normalize occurrence tables to freq tables
246 fse_normalize_freq(LZFSE_ENCODE_L_STATES, LZFSE_ENCODE_L_SYMBOLS, l_occ,
247 header1.l_freq);
248 fse_normalize_freq(LZFSE_ENCODE_M_STATES, LZFSE_ENCODE_M_SYMBOLS, m_occ,
249 header1.m_freq);
250 fse_normalize_freq(LZFSE_ENCODE_D_STATES, LZFSE_ENCODE_D_SYMBOLS, d_occ,
251 header1.d_freq);
252 fse_normalize_freq(LZFSE_ENCODE_LITERAL_STATES, LZFSE_ENCODE_LITERAL_SYMBOLS,
253 literal_occ, header1.literal_freq);
254
255 // Compress freq tables to V2 header, and get actual size of V2 header
256 s->dst += lzfse_encode_v1_freq_table(header2, &header1);
257
258 // Initialize encoder tables from freq tables
259 fse_init_encoder_table(LZFSE_ENCODE_L_STATES, LZFSE_ENCODE_L_SYMBOLS,
260 header1.l_freq, l_encoder);
261 fse_init_encoder_table(LZFSE_ENCODE_M_STATES, LZFSE_ENCODE_M_SYMBOLS,
262 header1.m_freq, m_encoder);
263 fse_init_encoder_table(LZFSE_ENCODE_D_STATES, LZFSE_ENCODE_D_SYMBOLS,
264 header1.d_freq, d_encoder);
265 fse_init_encoder_table(LZFSE_ENCODE_LITERAL_STATES,
266 LZFSE_ENCODE_LITERAL_SYMBOLS, header1.literal_freq,
267 literal_encoder);
268
269 // Encode literals
270 {
271 fse_out_stream out;
272 fse_out_init(&out);
273 fse_state state0, state1, state2, state3;
274 state0 = state1 = state2 = state3 = 0;
275
276 uint8_t *buf = s->dst;
277 uint32_t i = s->n_literals; // I multiple of 4
278 // We encode starting from the last literal so we can decode starting from
279 // the first
280 while (i > 0) {
281 if (buf + 16 > s->dst_end) {
282 ok = 0;
283 goto END;
284 } // out full
285 i -= 4;
286 fse_encode(&state3, literal_encoder, &out, s->literals[i + 3]); // 10b
287 fse_encode(&state2, literal_encoder, &out, s->literals[i + 2]); // 10b
288#if !FSE_IOSTREAM_64
289 fse_out_flush(&out, &buf);
290#endif
291 fse_encode(&state1, literal_encoder, &out, s->literals[i + 1]); // 10b
292 fse_encode(&state0, literal_encoder, &out, s->literals[i + 0]); // 10b
293 fse_out_flush(&out, &buf);
294 }
295 fse_out_finish(&out, &buf);
296
297 // Update header with final encoder state
298 header1.literal_bits = out.accum_nbits; // [-7, 0]
299 header1.n_literal_payload_bytes = (uint32_t)(buf - s->dst);
300 header1.literal_state[0] = state0;
301 header1.literal_state[1] = state1;
302 header1.literal_state[2] = state2;
303 header1.literal_state[3] = state3;
304
305 // Update state
306 s->dst = buf;
307
308 } // literals
309
310 // Encode L,M,D
311 {
312 fse_out_stream out;
313 fse_out_init(&out);
314 fse_state l_state, m_state, d_state;
315 l_state = m_state = d_state = 0;
316
317 uint8_t *buf = s->dst;
318 uint32_t i = s->n_matches;
319
320 // Add 8 padding bytes to the L,M,D payload
321 if (buf + 8 > s->dst_end) {
322 ok = 0;
323 goto END;
324 } // out full
325 store8(buf, 0);
326 buf += 8;
327
328 // We encode starting from the last match so we can decode starting from the
329 // first
330 while (i > 0) {
331 if (buf + 16 > s->dst_end) {
332 ok = 0;
333 goto END;
334 } // out full
335 i -= 1;
336
337 // D requires 23b max
338 int32_t d_value = s->d_values[i];
339 uint8_t d_symbol = d_base_from_value(d_value);
340 int32_t d_nbits = d_extra_bits[d_symbol];
341 int32_t d_bits = d_value - d_base_value[d_symbol];
342 fse_out_push(&out, d_nbits, d_bits);
343 fse_encode(&d_state, d_encoder, &out, d_symbol);
344#if !FSE_IOSTREAM_64
345 fse_out_flush(&out, &buf);
346#endif
347
348 // M requires 17b max
349 int32_t m_value = s->m_values[i];
350 uint8_t m_symbol = m_base_from_value(m_value);
351 int32_t m_nbits = m_extra_bits[m_symbol];
352 int32_t m_bits = m_value - m_base_value[m_symbol];
353 fse_out_push(&out, m_nbits, m_bits);
354 fse_encode(&m_state, m_encoder, &out, m_symbol);
355#if !FSE_IOSTREAM_64
356 fse_out_flush(&out, &buf);
357#endif
358
359 // L requires 14b max
360 int32_t l_value = s->l_values[i];
361 uint8_t l_symbol = l_base_from_value(l_value);
362 int32_t l_nbits = l_extra_bits[l_symbol];
363 int32_t l_bits = l_value - l_base_value[l_symbol];
364 fse_out_push(&out, l_nbits, l_bits);
365 fse_encode(&l_state, l_encoder, &out, l_symbol);
366 fse_out_flush(&out, &buf);
367 }
368 fse_out_finish(&out, &buf);
369
370 // Update header with final encoder state
371 header1.n_lmd_payload_bytes = (uint32_t)(buf - s->dst);
372 header1.lmd_bits = out.accum_nbits; // [-7, 0]
373 header1.l_state = l_state;
374 header1.m_state = m_state;
375 header1.d_state = d_state;
376
377 // Update state
378 s->dst = buf;
379
380 } // L,M,D
381
382 // Final state update, here we had enough space in DST, and are not going to
383 // revert state
384 s->n_literals = 0;
385 s->n_matches = 0;
386
387 // Final payload size
388 header1.n_payload_bytes =
389 header1.n_literal_payload_bytes + header1.n_lmd_payload_bytes;
390
391 // Encode state info in V2 header (we previously encoded the tables, now we
392 // set the other fields)
393 lzfse_encode_v1_state(header2, &header1);
394
395END:
396 if (!ok) {
397 // Revert state, DST was full
398
399 // Revert the d_prev encoding
400 uint32_t d_prev = 0;
401 for (uint32_t i = 0; i < s->n_matches; i++) {
402 uint32_t d = s->d_values[i];
403 if (d == 0)
404 s->d_values[i] = d_prev;
405 else
406 d_prev = d;
407 }
408
409 // Revert literal count
410 s->n_literals = n_literals0;
411
412 // Revert DST
413 s->dst = dst0;
414
415 return LZFSE_STATUS_DST_FULL; // DST full
416 }
417
418 return LZFSE_STATUS_OK;
419}
420
421/*! @abstract Push a L,M,D match into the STATE.
422 * @return LZFSE_STATUS_OK if OK.
423 * @return LZFSE_STATUS_DST_FULL if the match can't be pushed, meaning one of
424 * the buffers is full. In that case the state is not modified. */
425static inline int lzfse_push_lmd(lzfse_encoder_state *s, uint32_t L,
426 uint32_t M, uint32_t D) {
427 // Check if we have enough space to push the match (we add some margin to copy
428 // literals faster here, and round final count later)
429 if (s->n_matches + 1 + 8 > LZFSE_MATCHES_PER_BLOCK)
430 return LZFSE_STATUS_DST_FULL; // state full
431 if (s->n_literals + L + 16 > LZFSE_LITERALS_PER_BLOCK)
432 return LZFSE_STATUS_DST_FULL; // state full
433
434 // Store match
435 uint32_t n = s->n_matches++;
436 s->l_values[n] = L;
437 s->m_values[n] = M;
438 s->d_values[n] = D;
439
440 // Store literals
441 uint8_t *dst = s->literals + s->n_literals;
442 const uint8_t *src = s->src + s->src_literal;
443 uint8_t *dst_end = dst + L;
444 if (s->src_literal + L + 16 > s->src_end) {
445 // Careful at the end of SRC, we can't read 16 bytes
446 if (L > 0)
447 memcpy(dst, src, L);
448 } else {
449 copy16(dst, src);
450 dst += 16;
451 src += 16;
452 while (dst < dst_end) {
453 copy16(dst, src);
454 dst += 16;
455 src += 16;
456 }
457 }
458 s->n_literals += L;
459
460 // Update state
461 s->src_literal += L + M;
462
463 return LZFSE_STATUS_OK;
464}
465
466/*! @abstract Split MATCH into one or more L,M,D parts, and push to STATE.
467 * @return LZFSE_STATUS_OK if OK.
468 * @return LZFSE_STATUS_DST_FULL if the match can't be pushed, meaning one of the
469 * buffers is full. In that case the state is not modified. */
470static int lzfse_push_match(lzfse_encoder_state *s, const lzfse_match *match) {
471 // Save the initial n_matches, n_literals, src_literal
472 uint32_t n_matches0 = s->n_matches;
473 uint32_t n_literals0 = s->n_literals;
474 lzfse_offset src_literals0 = s->src_literal;
475
476 // L,M,D
477 uint32_t L = (uint32_t)(match->pos - s->src_literal); // literal count
478 uint32_t M = match->length; // match length
479 uint32_t D = (uint32_t)(match->pos - match->ref); // match distance
480 int ok = 1;
481
482 // Split L if too large
483 while (L > LZFSE_ENCODE_MAX_L_VALUE) {
484 if (lzfse_push_lmd(s, LZFSE_ENCODE_MAX_L_VALUE, 0, 1) != 0) {
485 ok = 0;
486 goto END;
487 } // take D=1 because most frequent, but not actually used
488 L -= LZFSE_ENCODE_MAX_L_VALUE;
489 }
490
491 // Split if M too large
492 while (M > LZFSE_ENCODE_MAX_M_VALUE) {
493 if (lzfse_push_lmd(s, L, LZFSE_ENCODE_MAX_M_VALUE, D) != 0) {
494 ok = 0;
495 goto END;
496 }
497 L = 0;
498 M -= LZFSE_ENCODE_MAX_M_VALUE;
499 }
500
501 // L,M in range
502 if (L > 0 || M > 0) {
503 if (lzfse_push_lmd(s, L, M, D) != 0) {
504 ok = 0;
505 goto END;
506 }
507 L = M = 0;
508 (void)L;
509 (void)M; // dead stores
510 }
511
512END:
513 if (!ok) {
514 // Revert state
515 s->n_matches = n_matches0;
516 s->n_literals = n_literals0;
517 s->src_literal = src_literals0;
518
519 return LZFSE_STATUS_DST_FULL; // state tables full
520 }
521
522 return LZFSE_STATUS_OK; // OK
523}
524
525/*! @abstract Backend: add MATCH to state S. Encode block if necessary, when
526 * state is full.
527 * @return LZFSE_STATUS_OK if OK.
528 * @return LZFSE_STATUS_DST_FULL if the match can't be added, meaning one of the
529 * buffers is full. In that case the state is not modified. */
530static int lzfse_backend_match(lzfse_encoder_state *s,
531 const lzfse_match *match) {
532 // Try to push the match in state
533 if (lzfse_push_match(s, match) == LZFSE_STATUS_OK)
534 return LZFSE_STATUS_OK; // OK, match added to state
535
536 // Here state tables are full, try to emit block
537 if (lzfse_encode_matches(s) != LZFSE_STATUS_OK)
538 return LZFSE_STATUS_DST_FULL; // DST full, match not added
539
540 // Here block has been emitted, re-try to push the match in state
541 return lzfse_push_match(s, match);
542}
543
544/*! @abstract Backend: add L literals to state S. Encode block if necessary,
545 * when state is full.
546 * @return LZFSE_STATUS_OK if OK.
547 * @return LZFSE_STATUS_DST_FULL if the literals can't be added, meaning one of
548 * the buffers is full. In that case the state is not modified. */
549static int lzfse_backend_literals(lzfse_encoder_state *s, lzfse_offset L) {
550 // Create a fake match with M=0, D=1
551 lzfse_match match;
552 lzfse_offset pos = s->src_literal + L;
553 match.pos = pos;
554 match.ref = match.pos - 1;
555 match.length = 0;
556 return lzfse_backend_match(s, &match);
557}
558
559/*! @abstract Backend: flush final block, and emit end of stream
560 * @return LZFSE_STATUS_OK if OK.
561 * @return LZFSE_STATUS_DST_FULL if either the final block, or the end-of-stream
562 * can't be added, meaning one of the buffers is full. If the block was emitted,
563 * the state is updated to reflect this. Otherwise, it is left unchanged. */
564static int lzfse_backend_end_of_stream(lzfse_encoder_state *s) {
565 // Final match triggers write, otherwise emit blocks when we have enough
566 // matches stored
567 if (lzfse_encode_matches(s) != LZFSE_STATUS_OK)
568 return LZFSE_STATUS_DST_FULL; // DST full
569
570 // Emit end-of-stream block
571 if (s->dst + 4 > s->dst_end)
572 return LZFSE_STATUS_DST_FULL; // DST full
573 store4(s->dst, LZFSE_ENDOFSTREAM_BLOCK_MAGIC);
574 s->dst += 4;
575
576 return LZFSE_STATUS_OK; // OK
577}
578
579// ===============================================================
580// Encoder state management
581
582/*! @abstract Initialize state:
583 * @code
584 * - hash table with all invalid pos, and value 0.
585 * - pending match to NO_MATCH.
586 * - src_literal to 0.
587 * - d_prev to 0.
588 @endcode
589 * @return LZFSE_STATUS_OK */
590int lzfse_encode_init(lzfse_encoder_state *s) {
591 const lzfse_match NO_MATCH = {0};
592 lzfse_history_set line;
593 for (int i = 0; i < LZFSE_ENCODE_HASH_WIDTH; i++) {
594 line.pos[i] = -4 * LZFSE_ENCODE_MAX_D_VALUE; // invalid pos
595 line.value[i] = 0;
596 }
597 // Fill table
598 for (int i = 0; i < LZFSE_ENCODE_HASH_VALUES; i++)
599 s->history_table[i] = line;
600 s->pending = NO_MATCH;
601 s->src_literal = 0;
602
603 return LZFSE_STATUS_OK; // OK
604}
605
606/*! @abstract Translate state \p src forward by \p delta > 0.
607 * Offsets in \p src are updated backwards to point to the same positions.
608 * @return LZFSE_STATUS_OK */
609int lzfse_encode_translate(lzfse_encoder_state *s, lzfse_offset delta) {
610 assert(delta >= 0);
611 if (delta == 0)
612 return LZFSE_STATUS_OK; // OK
613
614 // SRC
615 s->src += delta;
616
617 // Offsets in SRC
618 s->src_end -= delta;
619 s->src_encode_i -= delta;
620 s->src_encode_end -= delta;
621 s->src_literal -= delta;
622
623 // Pending match
624 s->pending.pos -= delta;
625 s->pending.ref -= delta;
626
627 // history_table positions, translated, and clamped to invalid pos
628 int32_t invalidPos = -4 * LZFSE_ENCODE_MAX_D_VALUE;
629 for (int i = 0; i < LZFSE_ENCODE_HASH_VALUES; i++) {
630 int32_t *p = &(s->history_table[i].pos[0]);
631 for (int j = 0; j < LZFSE_ENCODE_HASH_WIDTH; j++) {
632 lzfse_offset newPos = p[j] - delta; // translate
633 p[j] = (int32_t)((newPos < invalidPos) ? invalidPos : newPos); // clamp
634 }
635 }
636
637 return LZFSE_STATUS_OK; // OK
638}
639
640// ===============================================================
641// Encoder front end
642
643int lzfse_encode_base(lzfse_encoder_state *s) {
644 lzfse_history_set *history_table = s->history_table;
645 lzfse_history_set *hashLine = 0;
646 lzfse_history_set newH;
647 const lzfse_match NO_MATCH = {0};
648 int ok = 1;
649
650 memset(&newH, 0x00, sizeof(newH));
651
652 // 8 byte padding at end of buffer
653 s->src_encode_end = s->src_end - 8;
654 for (; s->src_encode_i < s->src_encode_end; s->src_encode_i++) {
655 lzfse_offset pos = s->src_encode_i; // pos >= 0
656
657 // Load 4 byte value and get hash line
658 uint32_t x = load4(s->src + pos);
659 hashLine = history_table + hashX(x);
660 lzfse_history_set h = *hashLine;
661
662 // Prepare next hash line (component 0 is the most recent) to prepare new
663 // entries (stored later)
664 {
665 newH.pos[0] = (int32_t)pos;
666 for (int k = 0; k < LZFSE_ENCODE_HASH_WIDTH - 1; k++)
667 newH.pos[k + 1] = h.pos[k];
668 newH.value[0] = x;
669 for (int k = 0; k < LZFSE_ENCODE_HASH_WIDTH - 1; k++)
670 newH.value[k + 1] = h.value[k];
671 }
672
673 // Do not look for a match if we are still covered by a previous match
674 if (pos < s->src_literal)
675 goto END_POS;
676
677 // Search best incoming match
678 lzfse_match incoming = {.pos = pos, .ref = 0, .length = 0};
679
680 // Check for matches. We consider matches of length >= 4 only.
681 for (int k = 0; k < LZFSE_ENCODE_HASH_WIDTH; k++) {
682 uint32_t d = h.value[k] ^ x;
683 if (d)
684 continue; // no 4 byte match
685 int32_t ref = h.pos[k];
686 if (ref + LZFSE_ENCODE_MAX_D_VALUE < pos)
687 continue; // too far
688
689 const uint8_t *src_ref = s->src + ref;
690 const uint8_t *src_pos = s->src + pos;
691 uint32_t length = 4;
692 uint32_t maxLength =
693 (uint32_t)(s->src_end - pos - 8); // ensure we don't hit the end of SRC
694 while (length < maxLength) {
695 uint64_t d = load8(src_ref + length) ^ load8(src_pos + length);
696 if (d == 0) {
697 length += 8;
698 continue;
699 }
700
701 length +=
702 (__builtin_ctzll(d) >> 3); // ctzll must be called only with D != 0
703 break;
704 }
705 if (length > incoming.length) {
706 incoming.length = length;
707 incoming.ref = ref;
708 } // keep if longer
709 }
710
711 // No incoming match?
712 if (incoming.length == 0) {
713 // We may still want to emit some literals here, to not lag too far behind
714 // the current search point, and avoid
715 // ending up with a literal block not fitting in the state.
716 lzfse_offset n_literals = pos - s->src_literal;
717 // The threshold here should be larger than a couple of MAX_L_VALUE, and
718 // much smaller than LITERALS_PER_BLOCK
719 if (n_literals > 8 * LZFSE_ENCODE_MAX_L_VALUE) {
720 // Here, we need to consume some literals. Emit pending match if there
721 // is one
722 if (s->pending.length > 0) {
723 if (lzfse_backend_match(s, &s->pending) != LZFSE_STATUS_OK) {
724 ok = 0;
725 goto END;
726 }
727 s->pending = NO_MATCH;
728 } else {
729 // No pending match, emit a full LZFSE_ENCODE_MAX_L_VALUE block of
730 // literals
731 if (lzfse_backend_literals(s, LZFSE_ENCODE_MAX_L_VALUE) !=
732 LZFSE_STATUS_OK) {
733 ok = 0;
734 goto END;
735 }
736 }
737 }
738 goto END_POS; // no incoming match
739 }
740
741 // Limit match length (it may still be expanded backwards, but this is
742 // bounded by the limit on literals we tested before)
743 if (incoming.length > LZFSE_ENCODE_MAX_MATCH_LENGTH) {
744 incoming.length = LZFSE_ENCODE_MAX_MATCH_LENGTH;
745 }
746
747 // Expand backwards (since this is expensive, we do this for the best match
748 // only)
749 while (incoming.pos > s->src_literal && incoming.ref > 0 &&
750 s->src[incoming.ref - 1] == s->src[incoming.pos - 1]) {
751 incoming.pos--;
752 incoming.ref--;
753 }
754 incoming.length += pos - incoming.pos; // update length after expansion
755
756 // Match filtering heuristic (from LZVN). INCOMING is always defined here.
757
758 // Incoming is 'good', emit incoming
759 if (incoming.length >= LZFSE_ENCODE_GOOD_MATCH) {
760 if (lzfse_backend_match(s, &incoming) != LZFSE_STATUS_OK) {
761 ok = 0;
762 goto END;
763 }
764 s->pending = NO_MATCH;
765 goto END_POS;
766 }
767
768 // No pending, keep incoming
769 if (s->pending.length == 0) {
770 s->pending = incoming;
771 goto END_POS;
772 }
773
774 // No overlap, emit pending, keep incoming
775 if (s->pending.pos + s->pending.length <= incoming.pos) {
776 if (lzfse_backend_match(s, &s->pending) != LZFSE_STATUS_OK) {
777 ok = 0;
778 goto END;
779 }
780 s->pending = incoming;
781 goto END_POS;
782 }
783
784 // Overlap: emit longest
785 if (incoming.length > s->pending.length) {
786 if (lzfse_backend_match(s, &incoming) != LZFSE_STATUS_OK) {
787 ok = 0;
788 goto END;
789 }
790 } else {
791 if (lzfse_backend_match(s, &s->pending) != LZFSE_STATUS_OK) {
792 ok = 0;
793 goto END;
794 }
795 }
796 s->pending = NO_MATCH;
797
798 END_POS:
799 // We are done with this src_encode_i.
800 // Update state now (s->pending has already been updated).
801 *hashLine = newH;
802 }
803
804END:
805 return ok ? LZFSE_STATUS_OK : LZFSE_STATUS_DST_FULL;
806}
807
808int lzfse_encode_finish(lzfse_encoder_state *s) {
809 const lzfse_match NO_MATCH = {0};
810
811 // Emit pending match
812 if (s->pending.length > 0) {
813 if (lzfse_backend_match(s, &s->pending) != LZFSE_STATUS_OK)
814 return LZFSE_STATUS_DST_FULL;
815 s->pending = NO_MATCH;
816 }
817
818 // Emit final literals if any
819 lzfse_offset L = s->src_end - s->src_literal;
820 if (L > 0) {
821 if (lzfse_backend_literals(s, L) != LZFSE_STATUS_OK)
822 return LZFSE_STATUS_DST_FULL;
823 }
824
825 // Emit all matches, and end-of-stream block
826 if (lzfse_backend_end_of_stream(s) != LZFSE_STATUS_OK)
827 return LZFSE_STATUS_DST_FULL;
828
829 return LZFSE_STATUS_OK;
830}
831