1/*
2Copyright (c) 2015-2016, Apple Inc. All rights reserved.
3
4Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
5
61. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
7
82. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer
9 in the documentation and/or other materials provided with the distribution.
10
113. Neither the name of the copyright holder(s) nor the names of any contributors may be used to endorse or promote products derived
12 from this software without specific prior written permission.
13
14THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
15LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
16COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
17(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
18HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
19ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
20*/
21
22#ifndef LZFSE_INTERNAL_H
23#define LZFSE_INTERNAL_H
24
25// Unlike the tunable parameters defined in lzfse_tunables.h, you probably
26// should not modify the values defined in this header. Doing so will either
27// break the compressor, or result in a compressed data format that is
28// incompatible.
29
30#include "lzfse_fse.h"
31#include "lzfse_tunables.h"
32#include <limits.h>
33#include <stddef.h>
34#include <stdint.h>
35
36// Throughout LZFSE we refer to "L", "M" and "D"; these will always appear as
37// a triplet, and represent a "usual" LZ-style literal and match pair. "L"
38// is the number of literal bytes, "M" is the number of match bytes, and "D"
39// is the match "distance"; the distance in bytes between the current pointer
40// and the start of the match.
41#define LZFSE_ENCODE_HASH_VALUES (1 << LZFSE_ENCODE_HASH_BITS)
42#define LZFSE_ENCODE_L_SYMBOLS 20
43#define LZFSE_ENCODE_M_SYMBOLS 20
44#define LZFSE_ENCODE_D_SYMBOLS 64
45#define LZFSE_ENCODE_LITERAL_SYMBOLS 256
46#define LZFSE_ENCODE_L_STATES 64
47#define LZFSE_ENCODE_M_STATES 64
48#define LZFSE_ENCODE_D_STATES 256
49#define LZFSE_ENCODE_LITERAL_STATES 1024
50#define LZFSE_MATCHES_PER_BLOCK 10000
51#define LZFSE_LITERALS_PER_BLOCK (4 * LZFSE_MATCHES_PER_BLOCK)
52#define LZFSE_DECODE_LITERALS_PER_BLOCK (4 * LZFSE_DECODE_MATCHES_PER_BLOCK)
53
54// LZFSE internal status. These values are used by internal LZFSE routines
55// as return codes. There should not be any good reason to change their
56// values; it is plausible that additional codes might be added in the
57// future.
58#define LZFSE_STATUS_OK 0
59#define LZFSE_STATUS_SRC_EMPTY -1
60#define LZFSE_STATUS_DST_FULL -2
61#define LZFSE_STATUS_ERROR -3
62
63// Type representing an offset between elements in a buffer. On 64-bit
64// systems, this is stored in a 64-bit container to avoid extra sign-
65// extension operations in addressing arithmetic, but the value is always
66// representable as a 32-bit signed value in LZFSE's usage.
67#if defined __x86_64__ || defined __arm64__
68typedef int64_t lzfse_offset;
69#else
70typedef int32_t lzfse_offset;
71#endif
72
73/*! @abstract History table set. Each line of the history table represents a set
74 * of candidate match locations, each of which begins with four bytes with the
75 * same hash. The table contains not only the positions, but also the first
76 * four bytes at each position. This doubles the memory footprint of the
77 * table, but allows us to quickly eliminate false-positive matches without
78 * doing any pointer chasing and without pulling in any additional cachelines.
79 * This provides a large performance win in practice. */
80typedef struct {
81 int32_t pos[LZFSE_ENCODE_HASH_WIDTH];
82 uint32_t value[LZFSE_ENCODE_HASH_WIDTH];
83} lzfse_history_set;
84
85/*! @abstract An lzfse match is a sequence of bytes in the source buffer that
86 * exactly matches an earlier (but possibly overlapping) sequence of bytes in
87 * the same buffer.
88 * @code
89 * exeMPLARYexaMPLE
90 * | | | ||-|--- lzfse_match2.length=3
91 * | | | ||----- lzfse_match2.pos
92 * | | |-|------ lzfse_match1.length=3
93 * | | |-------- lzfse_match1.pos
94 * | |-------------- lzfse_match2.ref
95 * |----------------- lzfse_match1.ref
96 * @endcode
97 */
98typedef struct {
99 // Offset of the first byte in the match.
100 lzfse_offset pos;
101 // First byte of the source -- the earlier location in the buffer with the
102 // same contents.
103 lzfse_offset ref;
104 // Length of the match.
105 uint32_t length;
106} lzfse_match;
107
108#pragma mark - Encoder and Decoder state objects
109
110/*! @abstract Encoder state object. */
111typedef struct {
112 // Pointer to first byte of the source buffer.
113 const uint8_t *src;
114 // Length of the source buffer in bytes. Note that this is not a size_t,
115 // but rather lzfse_offset, which is a signed type. The largest
116 // representable buffer is 2GB, but arbitrarily large buffers may be
117 // handled by repeatedly calling the encoder function and "translating"
118 // the state between calls. When doing this, it is beneficial to use
119 // blocks smaller than 2GB in order to maintain residency in the last-level
120 // cache. Consult the implementation of lzfse_encode_buffer for details.
121 lzfse_offset src_end;
122 // Offset of the first byte of the next literal to encode in the source
123 // buffer.
124 lzfse_offset src_literal;
125 // Offset of the byte currently being checked for a match.
126 lzfse_offset src_encode_i;
127 // The last byte offset to consider for a match. In some uses it makes
128 // sense to use a smaller offset than src_end.
129 lzfse_offset src_encode_end;
130 // Pointer to the next byte to be written in the destination buffer.
131 uint8_t *dst;
132 // Pointer to the first byte of the destination buffer.
133 uint8_t *dst_begin;
134 // Pointer to one byte past the end of the destination buffer.
135 uint8_t *dst_end;
136 // Pending match; will be emitted unless a better match is found.
137 lzfse_match pending;
138 // The number of matches written so far. Note that there is no problem in
139 // using a 32-bit field for this quantity, because the state already limits
140 // us to at most 2GB of data; there cannot possibly be more matches than
141 // there are bytes in the input.
142 uint32_t n_matches;
143 // The number of literals written so far.
144 uint32_t n_literals;
145 // Lengths of found literals.
146 uint32_t l_values[LZFSE_MATCHES_PER_BLOCK];
147 // Lengths of found matches.
148 uint32_t m_values[LZFSE_MATCHES_PER_BLOCK];
149 // Distances of found matches.
150 uint32_t d_values[LZFSE_MATCHES_PER_BLOCK];
151 // Concatenated literal bytes.
152 uint8_t literals[LZFSE_LITERALS_PER_BLOCK];
153 // History table used to search for matches. Each entry of the table
154 // corresponds to a group of four byte sequences in the input stream
155 // that hash to the same value.
156 lzfse_history_set history_table[LZFSE_ENCODE_HASH_VALUES];
157} lzfse_encoder_state;
158
159/*! @abstract Decoder state object for lzfse compressed blocks. */
160typedef struct {
161 // Number of matches remaining in the block.
162 uint32_t n_matches;
163 // Number of bytes used to encode L, M, D triplets for the block.
164 uint32_t n_lmd_payload_bytes;
165 // Pointer to the next literal to emit.
166 const uint8_t *current_literal;
167 // L, M, D triplet for the match currently being emitted. This is used only
168 // if we need to restart after reaching the end of the destination buffer in
169 // the middle of a literal or match.
170 int32_t l_value, m_value, d_value;
171 // FSE stream object.
172 fse_in_stream lmd_in_stream;
173 // Offset of L,M,D encoding in the input buffer. Because we read through an
174 // FSE stream *backwards* while decoding, this is decremented as we move
175 // through a block.
176 uint32_t lmd_in_buf;
177 // The current state of the L, M, and D FSE decoders.
178 uint16_t l_state, m_state, d_state;
179 // Internal FSE decoder tables for the current block. These have
180 // alignment forced to 8 bytes to guarantee that a single state's
181 // entry cannot span two cachelines.
182 fse_value_decoder_entry l_decoder[LZFSE_ENCODE_L_STATES] __attribute__((__aligned__(8)));
183 fse_value_decoder_entry m_decoder[LZFSE_ENCODE_M_STATES] __attribute__((__aligned__(8)));
184 fse_value_decoder_entry d_decoder[LZFSE_ENCODE_D_STATES] __attribute__((__aligned__(8)));
185 int32_t literal_decoder[LZFSE_ENCODE_LITERAL_STATES];
186 // The literal stream for the block, plus padding to allow for faster copy
187 // operations.
188 uint8_t literals[LZFSE_LITERALS_PER_BLOCK + 64];
189} lzfse_compressed_block_decoder_state;
190
191// Decoder state object for uncompressed blocks.
192typedef struct { uint32_t n_raw_bytes; } uncompressed_block_decoder_state;
193
194/*! @abstract Decoder state object for lzvn-compressed blocks. */
195typedef struct {
196 uint32_t n_raw_bytes;
197 uint32_t n_payload_bytes;
198 uint32_t d_prev;
199} lzvn_compressed_block_decoder_state;
200
201/*! @abstract Decoder state object. */
202typedef struct {
203 // Pointer to next byte to read from source buffer (this is advanced as we
204 // decode; src_begin describe the buffer and do not change).
205 const uint8_t *src;
206 // Pointer to first byte of source buffer.
207 const uint8_t *src_begin;
208 // Pointer to one byte past the end of the source buffer.
209 const uint8_t *src_end;
210 // Pointer to the next byte to write to destination buffer (this is advanced
211 // as we decode; dst_begin and dst_end describe the buffer and do not change).
212 uint8_t *dst;
213 // Pointer to first byte of destination buffer.
214 uint8_t *dst_begin;
215 // Pointer to one byte past the end of the destination buffer.
216 uint8_t *dst_end;
217 // 1 if we have reached the end of the stream, 0 otherwise.
218 int end_of_stream;
219 // magic number of the current block if we are within a block,
220 // LZFSE_NO_BLOCK_MAGIC otherwise.
221 uint32_t block_magic;
222 lzfse_compressed_block_decoder_state compressed_lzfse_block_state;
223 lzvn_compressed_block_decoder_state compressed_lzvn_block_state;
224 uncompressed_block_decoder_state uncompressed_block_state;
225} lzfse_decoder_state;
226
227#pragma mark - Block header objects
228
229#define LZFSE_NO_BLOCK_MAGIC 0x00000000 // 0 (invalid)
230#define LZFSE_ENDOFSTREAM_BLOCK_MAGIC 0x24787662 // bvx$ (end of stream)
231#define LZFSE_UNCOMPRESSED_BLOCK_MAGIC 0x2d787662 // bvx- (raw data)
232#define LZFSE_COMPRESSEDV1_BLOCK_MAGIC 0x31787662 // bvx1 (lzfse compressed, uncompressed tables)
233#define LZFSE_COMPRESSEDV2_BLOCK_MAGIC 0x32787662 // bvx2 (lzfse compressed, compressed tables)
234#define LZFSE_COMPRESSEDLZVN_BLOCK_MAGIC 0x6e787662 // bvxn (lzvn compressed)
235
236/*! @abstract Uncompressed block header in encoder stream. */
237typedef struct {
238 // Magic number, always LZFSE_UNCOMPRESSED_BLOCK_MAGIC.
239 uint32_t magic;
240 // Number of raw bytes in block.
241 uint32_t n_raw_bytes;
242} uncompressed_block_header;
243
244/*! @abstract Compressed block header with uncompressed tables. */
245typedef struct {
246 // Magic number, always LZFSE_COMPRESSEDV1_BLOCK_MAGIC.
247 uint32_t magic;
248 // Number of decoded (output) bytes in block.
249 uint32_t n_raw_bytes;
250 // Number of encoded (source) bytes in block.
251 uint32_t n_payload_bytes;
252 // Number of literal bytes output by block (*not* the number of literals).
253 uint32_t n_literals;
254 // Number of matches in block (which is also the number of literals).
255 uint32_t n_matches;
256 // Number of bytes used to encode literals.
257 uint32_t n_literal_payload_bytes;
258 // Number of bytes used to encode matches.
259 uint32_t n_lmd_payload_bytes;
260
261 // Final encoder states for the block, which will be the initial states for
262 // the decoder:
263 // Final accum_nbits for literals stream.
264 int32_t literal_bits;
265 // There are four interleaved streams of literals, so there are four final
266 // states.
267 uint16_t literal_state[4];
268 // accum_nbits for the l, m, d stream.
269 int32_t lmd_bits;
270 // Final L (literal length) state.
271 uint16_t l_state;
272 // Final M (match length) state.
273 uint16_t m_state;
274 // Final D (match distance) state.
275 uint16_t d_state;
276
277 // Normalized frequency tables for each stream. Sum of values in each
278 // array is the number of states.
279 uint16_t l_freq[LZFSE_ENCODE_L_SYMBOLS];
280 uint16_t m_freq[LZFSE_ENCODE_M_SYMBOLS];
281 uint16_t d_freq[LZFSE_ENCODE_D_SYMBOLS];
282 uint16_t literal_freq[LZFSE_ENCODE_LITERAL_SYMBOLS];
283} lzfse_compressed_block_header_v1;
284
285/*! @abstract Compressed block header with compressed tables. Note that because
286 * freq[] is compressed, the structure-as-stored-in-the-stream is *truncated*;
287 * we only store the used bytes of freq[]. This means that some extra care must
288 * be taken when reading one of these headers from the stream. */
289typedef struct {
290 // Magic number, always LZFSE_COMPRESSEDV2_BLOCK_MAGIC.
291 uint32_t magic;
292 // Number of decoded (output) bytes in block.
293 uint32_t n_raw_bytes;
294 // The fields n_payload_bytes ... d_state from the
295 // lzfse_compressed_block_header_v1 object are packed into three 64-bit
296 // fields in the compressed header, as follows:
297 //
298 // offset bits value
299 // 0 20 n_literals
300 // 20 20 n_literal_payload_bytes
301 // 40 20 n_matches
302 // 60 3 literal_bits
303 // 63 1 --- unused ---
304 //
305 // 0 10 literal_state[0]
306 // 10 10 literal_state[1]
307 // 20 10 literal_state[2]
308 // 30 10 literal_state[3]
309 // 40 20 n_lmd_payload_bytes
310 // 60 3 lmd_bits
311 // 63 1 --- unused ---
312 //
313 // 0 32 header_size (total header size in bytes; this does not
314 // correspond to a field in the uncompressed header version,
315 // but is required; we wouldn't know the size of the
316 // compresssed header otherwise.
317 // 32 10 l_state
318 // 42 10 m_state
319 // 52 10 d_state
320 // 62 2 --- unused ---
321 uint64_t packed_fields[3];
322 // Variable size freq tables, using a Huffman-style fixed encoding.
323 // Size allocated here is an upper bound (all values stored on 16 bits).
324 uint8_t freq[2 * (LZFSE_ENCODE_L_SYMBOLS + LZFSE_ENCODE_M_SYMBOLS +
325 LZFSE_ENCODE_D_SYMBOLS + LZFSE_ENCODE_LITERAL_SYMBOLS)];
326} __attribute__((__packed__, __aligned__(1)))
327lzfse_compressed_block_header_v2;
328
329/*! @abstract LZVN compressed block header. */
330typedef struct {
331 // Magic number, always LZFSE_COMPRESSEDLZVN_BLOCK_MAGIC.
332 uint32_t magic;
333 // Number of decoded (output) bytes.
334 uint32_t n_raw_bytes;
335 // Number of encoded (source) bytes.
336 uint32_t n_payload_bytes;
337} lzvn_compressed_block_header;
338
339#pragma mark - LZFSE encode/decode interfaces
340
341int lzfse_encode_init(lzfse_encoder_state *s);
342int lzfse_encode_translate(lzfse_encoder_state *s, lzfse_offset delta);
343int lzfse_encode_base(lzfse_encoder_state *s);
344int lzfse_encode_finish(lzfse_encoder_state *s);
345int lzfse_decode(lzfse_decoder_state *s);
346
347#pragma mark - LZVN encode/decode interfaces
348
349// Minimum source buffer size for compression. Smaller buffers will not be
350// compressed; the lzvn encoder will simply return.
351#define LZVN_ENCODE_MIN_SRC_SIZE ((size_t)8)
352
353// Maximum source buffer size for compression. Larger buffers will be
354// compressed partially.
355#define LZVN_ENCODE_MAX_SRC_SIZE ((size_t)0xffffffffU)
356
357// Minimum destination buffer size for compression. No compression will take
358// place if smaller.
359#define LZVN_ENCODE_MIN_DST_SIZE ((size_t)8)
360
361size_t lzvn_decode_scratch_size(void);
362size_t lzvn_encode_scratch_size(void);
363size_t lzvn_encode_buffer(void *__restrict dst, size_t dst_size,
364 const void *__restrict src, size_t src_size,
365 void *__restrict work);
366size_t lzvn_decode_buffer(void *__restrict dst, size_t dst_size,
367 const void *__restrict src, size_t src_size);
368
369/*! @abstract Signed offset in buffers, stored on either 32 or 64 bits. */
370#if defined(__x86_64__) || defined(__arm64__)
371typedef int64_t lzvn_offset;
372#else
373typedef int32_t lzvn_offset;
374#endif
375
376#pragma mark - LZFSE utility functions
377
378#define LZFSE_INLINE static inline __attribute__((__always_inline__))
379
380/*! @abstract Load bytes from memory location SRC. */
381LZFSE_INLINE uint16_t load2(const void *ptr) {
382 uint16_t data;
383 memcpy(&data, ptr, sizeof data);
384 return data;
385}
386
387LZFSE_INLINE uint32_t load4(const void *ptr) {
388 uint32_t data;
389 memcpy(&data, ptr, sizeof data);
390 return data;
391}
392
393LZFSE_INLINE uint64_t load8(const void *ptr) {
394 uint64_t data;
395 memcpy(&data, ptr, sizeof data);
396 return data;
397}
398
399/*! @abstract Store bytes to memory location DST. */
400LZFSE_INLINE void store2(void *ptr, uint16_t data) {
401 memcpy(ptr, &data, sizeof data);
402}
403
404LZFSE_INLINE void store4(void *ptr, uint32_t data) {
405 memcpy(ptr, &data, sizeof data);
406}
407
408LZFSE_INLINE void store8(void *ptr, uint64_t data) {
409 memcpy(ptr, &data, sizeof data);
410}
411
412/*! @abstract Load+store bytes from locations SRC to DST. Not intended for use
413 * with overlapping buffers. Note that for LZ-style compression, you need
414 * copies to behave like naive memcpy( ) implementations do, splatting the
415 * leading sequence if the buffers overlap. This copy does not do that, so
416 * should not be used with overlapping buffers. */
417LZFSE_INLINE void copy8(void *dst, const void *src) { store8(dst, load8(src)); }
418LZFSE_INLINE void copy16(void *dst, const void *src) {
419 uint64_t m0 = load8(src);
420 uint64_t m1 = load8(src + 8);
421 store8(dst, m0);
422 store8(dst + 8, m1);
423}
424
425// ===============================================================
426// Bitfield Operations
427
428/*! @abstract Extracts \p width bits from \p container, starting with \p lsb; if
429 * we view \p container as a bit array, we extract \c container[lsb:lsb+width]. */
430LZFSE_INLINE uintmax_t extract(uintmax_t container, unsigned lsb,
431 unsigned width) {
432 static const size_t container_width = sizeof container * 8;
433 assert(lsb < container_width);
434 assert(width > 0 && width <= container_width);
435 assert(lsb + width <= container_width);
436 if (width == container_width)
437 return container;
438 return (container >> lsb) & (((uintmax_t)1 << width) - 1);
439}
440
441/*! @abstract Inserts \p width bits from \p data into \p container, starting with \p lsb.
442 * Viewed as bit arrays, the operations is:
443 * @code
444 * container[:lsb] is unchanged
445 * container[lsb:lsb+width] <-- data[0:width]
446 * container[lsb+width:] is unchanged
447 * @endcode
448 */
449LZFSE_INLINE uintmax_t insert(uintmax_t container, uintmax_t data, unsigned lsb,
450 unsigned width) {
451 static const size_t container_width = sizeof container * 8;
452 assert(lsb < container_width);
453 assert(width > 0 && width <= container_width);
454 assert(lsb + width <= container_width);
455 if (width == container_width)
456 return container;
457 uintmax_t mask = ((uintmax_t)1 << width) - 1;
458 return (container & ~(mask << lsb)) | (data & mask) << lsb;
459}
460
461/*! @abstract Perform sanity checks on the values of lzfse_compressed_block_header_v1.
462 * Test that the field values are in the allowed limits, verify that the
463 * frequency tables sum to value less than total number of states.
464 * @return 0 if all tests passed.
465 * @return negative error code with 1 bit set for each failed test. */
466LZFSE_INLINE int lzfse_check_block_header_v1(
467 const lzfse_compressed_block_header_v1 *header) {
468 int tests_results = 0;
469 tests_results =
470 tests_results |
471 ((header->magic == LZFSE_COMPRESSEDV1_BLOCK_MAGIC) ? 0 : (1 << 0));
472 tests_results =
473 tests_results |
474 ((header->n_literals <= LZFSE_LITERALS_PER_BLOCK) ? 0 : (1 << 1));
475 tests_results =
476 tests_results |
477 ((header->n_matches <= LZFSE_MATCHES_PER_BLOCK) ? 0 : (1 << 2));
478
479 uint16_t literal_state[4];
480 memcpy(literal_state, header->literal_state, sizeof(uint16_t) * 4);
481
482 tests_results =
483 tests_results |
484 ((literal_state[0] < LZFSE_ENCODE_LITERAL_STATES) ? 0 : (1 << 3));
485 tests_results =
486 tests_results |
487 ((literal_state[1] < LZFSE_ENCODE_LITERAL_STATES) ? 0 : (1 << 4));
488 tests_results =
489 tests_results |
490 ((literal_state[2] < LZFSE_ENCODE_LITERAL_STATES) ? 0 : (1 << 5));
491 tests_results =
492 tests_results |
493 ((literal_state[3] < LZFSE_ENCODE_LITERAL_STATES) ? 0 : (1 << 6));
494
495 tests_results = tests_results |
496 ((header->l_state < LZFSE_ENCODE_L_STATES) ? 0 : (1 << 7));
497 tests_results = tests_results |
498 ((header->m_state < LZFSE_ENCODE_M_STATES) ? 0 : (1 << 8));
499 tests_results = tests_results |
500 ((header->d_state < LZFSE_ENCODE_D_STATES) ? 0 : (1 << 9));
501
502 int res;
503 res = fse_check_freq(header->l_freq, LZFSE_ENCODE_L_SYMBOLS,
504 LZFSE_ENCODE_L_STATES);
505 tests_results = tests_results | ((res == 0) ? 0 : (1 << 10));
506 res = fse_check_freq(header->m_freq, LZFSE_ENCODE_M_SYMBOLS,
507 LZFSE_ENCODE_M_STATES);
508 tests_results = tests_results | ((res == 0) ? 0 : (1 << 11));
509 res = fse_check_freq(header->d_freq, LZFSE_ENCODE_D_SYMBOLS,
510 LZFSE_ENCODE_D_STATES);
511 tests_results = tests_results | ((res == 0) ? 0 : (1 << 12));
512 res = fse_check_freq(header->literal_freq, LZFSE_ENCODE_LITERAL_SYMBOLS,
513 LZFSE_ENCODE_LITERAL_STATES);
514 tests_results = tests_results | ((res == 0) ? 0 : (1 << 13));
515
516 if (tests_results) {
517 return tests_results | 0x80000000; // each 1 bit is a test that failed
518 // (except for the sign bit)
519 }
520
521 return 0; // OK
522}
523
524#pragma mark - L, M, D encoding constants for LZFSE
525
526// Largest encodable L (literal length), M (match length) and D (match
527// distance) values.
528#define LZFSE_ENCODE_MAX_L_VALUE 315
529#define LZFSE_ENCODE_MAX_M_VALUE 2359
530#define LZFSE_ENCODE_MAX_D_VALUE 262139
531
532/*! @abstract The L, M, D data streams are all encoded as a "base" value, which is
533 * FSE-encoded, and an "extra bits" value, which is the difference between
534 * value and base, and is simply represented as a raw bit value (because it
535 * is the low-order bits of a larger number, not much entropy can be
536 * extracted from these bits by more complex encoding schemes). The following
537 * tables represent the number of low-order bits to encode separately and the
538 * base values for each of L, M, and D.
539 *
540 * @note The inverse tables for mapping the other way are significantly larger.
541 * Those tables have been split out to lzfse_encode_tables.h in order to keep
542 * this file relatively small. */
543static uint8_t l_extra_bits[LZFSE_ENCODE_L_SYMBOLS] = {
544 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 3, 5, 8
545};
546static int32_t l_base_value[LZFSE_ENCODE_L_SYMBOLS] = {
547 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 20, 28, 60
548};
549static uint8_t m_extra_bits[LZFSE_ENCODE_M_SYMBOLS] = {
550 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 5, 8, 11
551};
552static int32_t m_base_value[LZFSE_ENCODE_M_SYMBOLS] = {
553 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 24, 56, 312
554};
555static uint8_t d_extra_bits[LZFSE_ENCODE_D_SYMBOLS] = {
556 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3,
557 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7,
558 8, 8, 8, 8, 9, 9, 9, 9, 10, 10, 10, 10, 11, 11, 11, 11,
559 12, 12, 12, 12, 13, 13, 13, 13, 14, 14, 14, 14, 15, 15, 15, 15
560};
561static int32_t d_base_value[LZFSE_ENCODE_D_SYMBOLS] = {
562 0, 1, 2, 3, 4, 6, 8, 10, 12, 16,
563 20, 24, 28, 36, 44, 52, 60, 76, 92, 108,
564 124, 156, 188, 220, 252, 316, 380, 444, 508, 636,
565 764, 892, 1020, 1276, 1532, 1788, 2044, 2556, 3068, 3580,
566 4092, 5116, 6140, 7164, 8188, 10236, 12284, 14332, 16380, 20476,
567 24572, 28668, 32764, 40956, 49148, 57340, 65532, 81916, 98300, 114684,
568 131068, 163836, 196604, 229372
569};
570
571#endif // LZFSE_INTERNAL_H
572