1/*
2Copyright (c) 2015-2016, Apple Inc. All rights reserved.
3
4Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
5
61. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
7
82. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer
9 in the documentation and/or other materials provided with the distribution.
10
113. Neither the name of the copyright holder(s) nor the names of any contributors may be used to endorse or promote products derived
12 from this software without specific prior written permission.
13
14THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
15LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
16COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
17(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
18HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
19ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
20*/
21
22// Finite state entropy coding (FSE)
23// This is an implementation of the tANS algorithm described by Jarek Duda,
24// we use the more descriptive name "Finite State Entropy".
25
26#pragma once
27
28#include <assert.h>
29#include <stddef.h>
30#include <stdint.h>
31#include <stdlib.h>
32#include <string.h>
33
34// Select between 32/64-bit I/O streams for FSE. Note that the FSE stream
35// size need not match the word size of the machine, but in practice you
36// want to use 64b streams on 64b systems for better performance.
37#if defined __x86_64__ || defined __arm64__
38#define FSE_IOSTREAM_64 1
39#else
40#define FSE_IOSTREAM_64 0
41#endif
42
43#define FSE_INLINE static inline __attribute__((__always_inline__))
44
45#pragma mark - Bit utils
46
47/*! @abstract Signed type used to represent bit count. */
48typedef int32_t fse_bit_count;
49
50/*! @abstract Unsigned type used to represent FSE state. */
51typedef uint16_t fse_state;
52
53// Mask the NBITS lsb of X. 0 <= NBITS < 64
54static inline uint64_t fse_mask_lsb64(uint64_t x, fse_bit_count nbits) {
55 static const uint64_t mtable[65] = {
56 0x0000000000000000LLU, 0x0000000000000001LLU, 0x0000000000000003LLU,
57 0x0000000000000007LLU, 0x000000000000000fLLU, 0x000000000000001fLLU,
58 0x000000000000003fLLU, 0x000000000000007fLLU, 0x00000000000000ffLLU,
59 0x00000000000001ffLLU, 0x00000000000003ffLLU, 0x00000000000007ffLLU,
60 0x0000000000000fffLLU, 0x0000000000001fffLLU, 0x0000000000003fffLLU,
61 0x0000000000007fffLLU, 0x000000000000ffffLLU, 0x000000000001ffffLLU,
62 0x000000000003ffffLLU, 0x000000000007ffffLLU, 0x00000000000fffffLLU,
63 0x00000000001fffffLLU, 0x00000000003fffffLLU, 0x00000000007fffffLLU,
64 0x0000000000ffffffLLU, 0x0000000001ffffffLLU, 0x0000000003ffffffLLU,
65 0x0000000007ffffffLLU, 0x000000000fffffffLLU, 0x000000001fffffffLLU,
66 0x000000003fffffffLLU, 0x000000007fffffffLLU, 0x00000000ffffffffLLU,
67 0x00000001ffffffffLLU, 0x00000003ffffffffLLU, 0x00000007ffffffffLLU,
68 0x0000000fffffffffLLU, 0x0000001fffffffffLLU, 0x0000003fffffffffLLU,
69 0x0000007fffffffffLLU, 0x000000ffffffffffLLU, 0x000001ffffffffffLLU,
70 0x000003ffffffffffLLU, 0x000007ffffffffffLLU, 0x00000fffffffffffLLU,
71 0x00001fffffffffffLLU, 0x00003fffffffffffLLU, 0x00007fffffffffffLLU,
72 0x0000ffffffffffffLLU, 0x0001ffffffffffffLLU, 0x0003ffffffffffffLLU,
73 0x0007ffffffffffffLLU, 0x000fffffffffffffLLU, 0x001fffffffffffffLLU,
74 0x003fffffffffffffLLU, 0x007fffffffffffffLLU, 0x00ffffffffffffffLLU,
75 0x01ffffffffffffffLLU, 0x03ffffffffffffffLLU, 0x07ffffffffffffffLLU,
76 0x0fffffffffffffffLLU, 0x1fffffffffffffffLLU, 0x3fffffffffffffffLLU,
77 0x7fffffffffffffffLLU, 0xffffffffffffffffLLU,
78 };
79 return x & mtable[nbits];
80}
81
82// Mask the NBITS lsb of X. 0 <= NBITS < 32
83static inline uint32_t fse_mask_lsb32(uint32_t x, fse_bit_count nbits) {
84 static const uint32_t mtable[33] = {
85 0x0000000000000000U, 0x0000000000000001U, 0x0000000000000003U,
86 0x0000000000000007U, 0x000000000000000fU, 0x000000000000001fU,
87 0x000000000000003fU, 0x000000000000007fU, 0x00000000000000ffU,
88 0x00000000000001ffU, 0x00000000000003ffU, 0x00000000000007ffU,
89 0x0000000000000fffU, 0x0000000000001fffU, 0x0000000000003fffU,
90 0x0000000000007fffU, 0x000000000000ffffU, 0x000000000001ffffU,
91 0x000000000003ffffU, 0x000000000007ffffU, 0x00000000000fffffU,
92 0x00000000001fffffU, 0x00000000003fffffU, 0x00000000007fffffU,
93 0x0000000000ffffffU, 0x0000000001ffffffU, 0x0000000003ffffffU,
94 0x0000000007ffffffU, 0x000000000fffffffU, 0x000000001fffffffU,
95 0x000000003fffffffU, 0x000000007fffffffU, 0x00000000ffffffffU,
96 };
97 return x & mtable[nbits];
98}
99
100/*! @abstract Select \c nbits at index \c start from \c x.
101 * 0 <= start <= start+nbits <= 64 */
102FSE_INLINE uint64_t fse_extract_bits64(uint64_t x, fse_bit_count start,
103 fse_bit_count nbits) {
104 // If START and NBITS are constants, map to bit-field extraction instructions
105 if (__builtin_constant_p(start) && __builtin_constant_p(nbits))
106 return (x >> start) & ((1LLU << nbits) - 1LLU);
107
108 // Otherwise, shift and mask
109 return fse_mask_lsb64(x >> start, nbits);
110}
111
112/*! @abstract Select \c nbits at index \c start from \c x.
113 * 0 <= start <= start+nbits <= 32 */
114FSE_INLINE uint32_t fse_extract_bits32(uint32_t x, fse_bit_count start,
115 fse_bit_count nbits) {
116 // If START and NBITS are constants, map to bit-field extraction instructions
117 if (__builtin_constant_p(start) && __builtin_constant_p(nbits))
118 return (x >> start) & ((1U << nbits) - 1U);
119
120 // Otherwise, shift and mask
121 return fse_mask_lsb32(x >> start, nbits);
122}
123
124#pragma mark - Bit stream
125
126// I/O streams
127// The streams can be shared between several FSE encoders/decoders, which is why
128// they are not in the state struct
129
130/*! @abstract Output stream, 64-bit accum. */
131typedef struct {
132 uint64_t accum; // Output bits
133 fse_bit_count accum_nbits; // Number of valid bits in ACCUM, other bits are 0
134} fse_out_stream64;
135
136/*! @abstract Output stream, 32-bit accum. */
137typedef struct {
138 uint32_t accum; // Output bits
139 fse_bit_count accum_nbits; // Number of valid bits in ACCUM, other bits are 0
140} fse_out_stream32;
141
142/*! @abstract Object representing an input stream. */
143typedef struct {
144 uint64_t accum; // Input bits
145 fse_bit_count accum_nbits; // Number of valid bits in ACCUM, other bits are 0
146} fse_in_stream64;
147
148/*! @abstract Object representing an input stream. */
149typedef struct {
150 uint32_t accum; // Input bits
151 fse_bit_count accum_nbits; // Number of valid bits in ACCUM, other bits are 0
152} fse_in_stream32;
153
154/*! @abstract Initialize an output stream object. */
155FSE_INLINE void fse_out_init64(fse_out_stream64 *s) {
156 s->accum = 0;
157 s->accum_nbits = 0;
158}
159
160/*! @abstract Initialize an output stream object. */
161FSE_INLINE void fse_out_init32(fse_out_stream32 *s) {
162 s->accum = 0;
163 s->accum_nbits = 0;
164}
165
166/*! @abstract Write full bytes from the accumulator to output buffer, ensuring
167 * accum_nbits is in [0, 7].
168 * We assume we can write 8 bytes to the output buffer \c (*pbuf[0..7]) in all
169 * cases.
170 * @note *pbuf is incremented by the number of written bytes. */
171FSE_INLINE void fse_out_flush64(fse_out_stream64 *s, uint8_t **pbuf) {
172 fse_bit_count nbits =
173 s->accum_nbits & -8; // number of bits written, multiple of 8
174
175 // Write 8 bytes of current accumulator
176 memcpy(*pbuf, &(s->accum), 8);
177 *pbuf += (nbits >> 3); // bytes
178
179 // Update state
180 s->accum >>= nbits; // remove nbits
181 s->accum_nbits -= nbits;
182
183 assert(s->accum_nbits >= 0 && s->accum_nbits <= 7);
184 assert(s->accum_nbits == 64 || (s->accum >> s->accum_nbits) == 0);
185}
186
187/*! @abstract Write full bytes from the accumulator to output buffer, ensuring
188 * accum_nbits is in [0, 7].
189 * We assume we can write 4 bytes to the output buffer \c (*pbuf[0..3]) in all
190 * cases.
191 * @note *pbuf is incremented by the number of written bytes. */
192FSE_INLINE void fse_out_flush32(fse_out_stream32 *s, uint8_t **pbuf) {
193 fse_bit_count nbits =
194 s->accum_nbits & -8; // number of bits written, multiple of 8
195
196 // Write 4 bytes of current accumulator
197 memcpy(*pbuf, &(s->accum), 4);
198 *pbuf += (nbits >> 3); // bytes
199
200 // Update state
201 s->accum >>= nbits; // remove nbits
202 s->accum_nbits -= nbits;
203
204 assert(s->accum_nbits >= 0 && s->accum_nbits <= 7);
205 assert(s->accum_nbits == 32 || (s->accum >> s->accum_nbits) == 0);
206}
207
208/*! @abstract Write the last bytes from the accumulator to output buffer,
209 * ensuring accum_nbits is in [-7, 0]. Bits are padded with 0 if needed.
210 * We assume we can write 8 bytes to the output buffer \c (*pbuf[0..7]) in all
211 * cases.
212 * @note *pbuf is incremented by the number of written bytes. */
213FSE_INLINE void fse_out_finish64(fse_out_stream64 *s, uint8_t **pbuf) {
214 fse_bit_count nbits =
215 (s->accum_nbits + 7) & -8; // number of bits written, multiple of 8
216
217 // Write 8 bytes of current accumulator
218 memcpy(*pbuf, &(s->accum), 8);
219 *pbuf += (nbits >> 3); // bytes
220
221 // Update state
222 s->accum = 0; // remove nbits
223 s->accum_nbits -= nbits;
224
225 assert(s->accum_nbits >= -7 && s->accum_nbits <= 0);
226}
227
228/*! @abstract Write the last bytes from the accumulator to output buffer,
229 * ensuring accum_nbits is in [-7, 0]. Bits are padded with 0 if needed.
230 * We assume we can write 4 bytes to the output buffer \c (*pbuf[0..3]) in all
231 * cases.
232 * @note *pbuf is incremented by the number of written bytes. */
233FSE_INLINE void fse_out_finish32(fse_out_stream32 *s, uint8_t **pbuf) {
234 fse_bit_count nbits =
235 (s->accum_nbits + 7) & -8; // number of bits written, multiple of 8
236
237 // Write 8 bytes of current accumulator
238 memcpy(*pbuf, &(s->accum), 4);
239 *pbuf += (nbits >> 3); // bytes
240
241 // Update state
242 s->accum = 0; // remove nbits
243 s->accum_nbits -= nbits;
244
245 assert(s->accum_nbits >= -7 && s->accum_nbits <= 0);
246}
247
248/*! @abstract Accumulate \c n bits \c b to output stream \c s. We \b must have:
249 * 0 <= b < 2^n, and N + s->accum_nbits <= 64.
250 * @note The caller must ensure out_flush is called \b before the accumulator
251 * overflows to more than 64 bits. */
252FSE_INLINE void fse_out_push64(fse_out_stream64 *s, fse_bit_count n,
253 uint64_t b) {
254 s->accum |= b << s->accum_nbits;
255 s->accum_nbits += n;
256
257 assert(s->accum_nbits >= 0 && s->accum_nbits <= 64);
258 assert(s->accum_nbits == 64 || (s->accum >> s->accum_nbits) == 0);
259}
260
261/*! @abstract Accumulate \c n bits \c b to output stream \c s. We \b must have:
262 * 0 <= n < 2^n, and n + s->accum_nbits <= 32.
263 * @note The caller must ensure out_flush is called \b before the accumulator
264 * overflows to more than 32 bits. */
265FSE_INLINE void fse_out_push32(fse_out_stream32 *s, fse_bit_count n,
266 uint32_t b) {
267 s->accum |= b << s->accum_nbits;
268 s->accum_nbits += n;
269
270 assert(s->accum_nbits >= 0 && s->accum_nbits <= 32);
271 assert(s->accum_nbits == 32 || (s->accum >> s->accum_nbits) == 0);
272}
273
274#if FSE_IOSTREAM_64
275#define DEBUG_CHECK_INPUT_STREAM_PARAMETERS \
276 assert(s->accum_nbits >= 56 && s->accum_nbits < 64); \
277 assert((s->accum >> s->accum_nbits) == 0);
278#else
279#define DEBUG_CHECK_INPUT_STREAM_PARAMETERS \
280 assert(s->accum_nbits >= 24 && s->accum_nbits < 32); \
281 assert((s->accum >> s->accum_nbits) == 0);
282#endif
283
284/*! @abstract Initialize the fse input stream so that accum holds between 56
285 * and 63 bits. We never want to have 64 bits in the stream, because that allows
286 * us to avoid a special case in the fse_in_pull function (eliminating an
287 * unpredictable branch), while not requiring any additional fse_flush
288 * operations. This is why we have the special case for n == 0 (in which case
289 * we want to load only 7 bytes instead of 8). */
290FSE_INLINE int fse_in_checked_init64(fse_in_stream64 *s, fse_bit_count n,
291 const uint8_t **pbuf,
292 const uint8_t *buf_start) {
293 if (n) {
294 if (*pbuf < buf_start + 8)
295 return -1; // out of range
296 *pbuf -= 8;
297 memcpy(&(s->accum), *pbuf, 8);
298 s->accum_nbits = n + 64;
299 } else {
300 if (*pbuf < buf_start + 7)
301 return -1; // out of range
302 *pbuf -= 7;
303 memcpy(&(s->accum), *pbuf, 7);
304 s->accum &= 0xffffffffffffff;
305 s->accum_nbits = n + 56;
306 }
307
308 if ((s->accum_nbits < 56 || s->accum_nbits >= 64) ||
309 ((s->accum >> s->accum_nbits) != 0)) {
310 return -1; // the incoming input is wrong (encoder should have zeroed the
311 // upper bits)
312 }
313
314 return 0; // OK
315}
316
317/*! @abstract Identical to previous function, but for 32-bit operation
318 * (resulting bit count is between 24 and 31 bits). */
319FSE_INLINE int fse_in_checked_init32(fse_in_stream32 *s, fse_bit_count n,
320 const uint8_t **pbuf,
321 const uint8_t *buf_start) {
322 if (n) {
323 if (*pbuf < buf_start + 4)
324 return -1; // out of range
325 *pbuf -= 4;
326 memcpy(&(s->accum), *pbuf, 4);
327 s->accum_nbits = n + 32;
328 } else {
329 if (*pbuf < buf_start + 3)
330 return -1; // out of range
331 *pbuf -= 3;
332 memcpy(&(s->accum), *pbuf, 3);
333 s->accum &= 0xffffff;
334 s->accum_nbits = n + 24;
335 }
336
337 if ((s->accum_nbits < 24 || s->accum_nbits >= 32) ||
338 ((s->accum >> s->accum_nbits) != 0)) {
339 return -1; // the incoming input is wrong (encoder should have zeroed the
340 // upper bits)
341 }
342
343 return 0; // OK
344}
345
346/*! @abstract Read in new bytes from buffer to ensure that we have a full
347 * complement of bits in the stream object (again, between 56 and 63 bits).
348 * checking the new value of \c *pbuf remains >= \c buf_start.
349 * @return 0 if OK.
350 * @return -1 on failure. */
351FSE_INLINE int fse_in_checked_flush64(fse_in_stream64 *s, const uint8_t **pbuf,
352 const uint8_t *buf_start) {
353 // Get number of bits to add to bring us into the desired range.
354 fse_bit_count nbits = (63 - s->accum_nbits) & -8;
355 // Convert bits to bytes and decrement buffer address, then load new data.
356 const uint8_t *buf = (*pbuf) - (nbits >> 3);
357 if (buf < buf_start) {
358 return -1; // out of range
359 }
360 *pbuf = buf;
361 uint64_t incoming;
362 memcpy(&incoming, buf, 8);
363 // Update the state object and verify its validity (in DEBUG).
364 s->accum = (s->accum << nbits) | fse_mask_lsb64(incoming, nbits);
365 s->accum_nbits += nbits;
366 DEBUG_CHECK_INPUT_STREAM_PARAMETERS
367 return 0; // OK
368}
369
370/*! @abstract Identical to previous function (but again, we're only filling
371 * a 32-bit field with between 24 and 31 bits). */
372FSE_INLINE int fse_in_checked_flush32(fse_in_stream32 *s, const uint8_t **pbuf,
373 const uint8_t *buf_start) {
374 // Get number of bits to add to bring us into the desired range.
375 fse_bit_count nbits = (31 - s->accum_nbits) & -8;
376
377 if (nbits > 0) {
378 // Convert bits to bytes and decrement buffer address, then load new data.
379 const uint8_t *buf = (*pbuf) - (nbits >> 3);
380 if (buf < buf_start) {
381 return -1; // out of range
382 }
383
384 *pbuf = buf;
385
386 uint32_t incoming = *((uint32_t *)buf);
387
388 // Update the state object and verify its validity (in DEBUG).
389 s->accum = (s->accum << nbits) | fse_mask_lsb32(incoming, nbits);
390 s->accum_nbits += nbits;
391 }
392 DEBUG_CHECK_INPUT_STREAM_PARAMETERS
393 return 0; // OK
394}
395
396/*! @abstract Pull n bits out of the fse stream object. */
397FSE_INLINE uint64_t fse_in_pull64(fse_in_stream64 *s, fse_bit_count n) {
398 assert(n >= 0 && n <= s->accum_nbits);
399 s->accum_nbits -= n;
400 uint64_t result = s->accum >> s->accum_nbits;
401 s->accum = fse_mask_lsb64(s->accum, s->accum_nbits);
402 return result;
403}
404
405/*! @abstract Pull n bits out of the fse stream object. */
406FSE_INLINE uint32_t fse_in_pull32(fse_in_stream32 *s, fse_bit_count n) {
407 assert(n >= 0 && n <= s->accum_nbits);
408 s->accum_nbits -= n;
409 uint32_t result = s->accum >> s->accum_nbits;
410 s->accum = fse_mask_lsb32(s->accum, s->accum_nbits);
411 return result;
412}
413
414#pragma mark - Encode/Decode
415
416// Map to 32/64-bit implementations and types for I/O
417#if FSE_IOSTREAM_64
418
419typedef uint64_t fse_bits;
420typedef fse_out_stream64 fse_out_stream;
421typedef fse_in_stream64 fse_in_stream;
422#define fse_mask_lsb fse_mask_lsb64
423#define fse_extract_bits fse_extract_bits64
424#define fse_out_init fse_out_init64
425#define fse_out_flush fse_out_flush64
426#define fse_out_finish fse_out_finish64
427#define fse_out_push fse_out_push64
428#define fse_in_init fse_in_checked_init64
429#define fse_in_checked_init fse_in_checked_init64
430#define fse_in_flush fse_in_checked_flush64
431#define fse_in_checked_flush fse_in_checked_flush64
432#define fse_in_flush2(_unused, _parameters, _unused2) 0 /* nothing */
433#define fse_in_checked_flush2(_unused, _parameters) /* nothing */
434#define fse_in_pull fse_in_pull64
435
436#else
437
438typedef uint32_t fse_bits;
439typedef fse_out_stream32 fse_out_stream;
440typedef fse_in_stream32 fse_in_stream;
441#define fse_mask_lsb fse_mask_lsb32
442#define fse_extract_bits fse_extract_bits32
443#define fse_out_init fse_out_init32
444#define fse_out_flush fse_out_flush32
445#define fse_out_finish fse_out_finish32
446#define fse_out_push fse_out_push32
447#define fse_in_init fse_in_checked_init32
448#define fse_in_checked_init fse_in_checked_init32
449#define fse_in_flush fse_in_checked_flush32
450#define fse_in_checked_flush fse_in_checked_flush32
451#define fse_in_flush2 fse_in_checked_flush32
452#define fse_in_checked_flush2 fse_in_checked_flush32
453#define fse_in_pull fse_in_pull32
454
455#endif
456
457/*! @abstract Entry for one symbol in the encoder table (64b). */
458typedef struct {
459 int16_t s0; // First state requiring a K-bit shift
460 int16_t k; // States S >= S0 are shifted K bits. States S < S0 are
461 // shifted K-1 bits
462 int16_t delta0; // Relative increment used to compute next state if S >= S0
463 int16_t delta1; // Relative increment used to compute next state if S < S0
464} fse_encoder_entry;
465
466/*! @abstract Entry for one state in the decoder table (32b). */
467typedef struct { // DO NOT REORDER THE FIELDS
468 int8_t k; // Number of bits to read
469 uint8_t symbol; // Emitted symbol
470 int16_t delta; // Signed increment used to compute next state (+bias)
471} fse_decoder_entry;
472
473/*! @abstract Entry for one state in the value decoder table (64b). */
474typedef struct { // DO NOT REORDER THE FIELDS
475 uint8_t total_bits; // state bits + extra value bits = shift for next decode
476 uint8_t value_bits; // extra value bits
477 int16_t delta; // state base (delta)
478 int32_t vbase; // value base
479} fse_value_decoder_entry;
480
481/*! @abstract Encode SYMBOL using the encoder table, and update \c *pstate,
482 * \c out.
483 * @note The caller must ensure we have enough bits available in the output
484 * stream accumulator. */
485FSE_INLINE void fse_encode(fse_state *__restrict pstate,
486 const fse_encoder_entry *__restrict encoder_table,
487 fse_out_stream *__restrict out, uint8_t symbol) {
488 int s = *pstate;
489 fse_encoder_entry e = encoder_table[symbol];
490 int s0 = e.s0;
491 int k = e.k;
492 int delta0 = e.delta0;
493 int delta1 = e.delta1;
494
495 // Number of bits to write
496 int hi = s >= s0;
497 fse_bit_count nbits = hi ? k : (k - 1);
498 fse_state delta = hi ? delta0 : delta1;
499
500 // Write lower NBITS of state
501 fse_bits b = fse_mask_lsb(s, nbits);
502 fse_out_push(out, nbits, b);
503
504 // Update state with remaining bits and delta
505 *pstate = delta + (s >> nbits);
506}
507
508/*! @abstract Decode and return symbol using the decoder table, and update
509 * \c *pstate, \c in.
510 * @note The caller must ensure we have enough bits available in the input
511 * stream accumulator. */
512FSE_INLINE uint8_t fse_decode(fse_state *__restrict pstate,
513 const int32_t *__restrict decoder_table,
514 fse_in_stream *__restrict in) {
515 int32_t e = decoder_table[*pstate];
516
517 // Update state from K bits of input + DELTA
518 *pstate = (fse_state)(e >> 16) + (fse_state)fse_in_pull(in, e & 0xff);
519
520 // Return the symbol for this state
521 return fse_extract_bits(e, 8, 8); // symbol
522}
523
524/*! @abstract Decode and return value using the decoder table, and update \c
525 * *pstate, \c in.
526 * \c value_decoder_table[nstates]
527 * @note The caller must ensure we have enough bits available in the input
528 * stream accumulator. */
529FSE_INLINE int32_t
530fse_value_decode(fse_state *__restrict pstate,
531 const fse_value_decoder_entry *value_decoder_table,
532 fse_in_stream *__restrict in) {
533 fse_value_decoder_entry entry = value_decoder_table[*pstate];
534 uint32_t state_and_value_bits = (uint32_t)fse_in_pull(in, entry.total_bits);
535 *pstate =
536 (fse_state)(entry.delta + (state_and_value_bits >> entry.value_bits));
537 return (int32_t)(entry.vbase +
538 fse_mask_lsb(state_and_value_bits, entry.value_bits));
539}
540
541#pragma mark - Tables
542
543// IMPORTANT: To properly decode an FSE encoded stream, both encoder/decoder
544// tables shall be initialized with the same parameters, including the
545// FREQ[NSYMBOL] array.
546//
547
548/*! @abstract Sanity check on frequency table, verify sum of \c freq
549 * is <= \c number_of_states. */
550FSE_INLINE int fse_check_freq(const uint16_t *freq_table,
551 const size_t table_size,
552 const size_t number_of_states) {
553 size_t sum_of_freq = 0;
554 for (int i = 0; i < table_size; i++) {
555 sum_of_freq += freq_table[i];
556 }
557 return (sum_of_freq > number_of_states) ? -1 : 0;
558}
559
560/*! @abstract Initialize encoder table \c t[nsymbols].
561 *
562 * @param nstates
563 * sum \c freq[i]; the number of states (a power of 2).
564 *
565 * @param nsymbols
566 * the number of symbols.
567 *
568 * @param freq[nsymbols]
569 * is a normalized histogram of symbol frequencies, with \c freq[i] >= 0.
570 * Some symbols may have a 0 frequency. In that case they should not be
571 * present in the data.
572 */
573void fse_init_encoder_table(int nstates, int nsymbols,
574 const uint16_t *__restrict freq,
575 fse_encoder_entry *__restrict t);
576
577/*! @abstract Initialize decoder table \c t[nstates].
578 *
579 * @param nstates
580 * sum \c freq[i]; the number of states (a power of 2).
581 *
582 * @param nsymbols
583 * the number of symbols.
584 *
585 * @param feq[nsymbols]
586 * a normalized histogram of symbol frequencies, with \c freq[i] >= 0.
587 * Some symbols may have a 0 frequency. In that case they should not be
588 * present in the data.
589 *
590 * @return 0 if OK.
591 * @return -1 on failure.
592 */
593int fse_init_decoder_table(int nstates, int nsymbols,
594 const uint16_t *__restrict freq,
595 int32_t *__restrict t);
596
597/*! @abstract Initialize value decoder table \c t[nstates].
598 *
599 * @param nstates
600 * sum \cfreq[i]; the number of states (a power of 2).
601 *
602 * @param nsymbols
603 * the number of symbols.
604 *
605 * @param freq[nsymbols]
606 * a normalized histogram of symbol frequencies, with \c freq[i] >= 0.
607 * \c symbol_vbits[nsymbols] and \c symbol_vbase[nsymbols] are the number of
608 * value bits to read and the base value for each symbol.
609 * Some symbols may have a 0 frequency. In that case they should not be
610 * present in the data.
611 */
612void fse_init_value_decoder_table(int nstates, int nsymbols,
613 const uint16_t *__restrict freq,
614 const uint8_t *__restrict symbol_vbits,
615 const int32_t *__restrict symbol_vbase,
616 fse_value_decoder_entry *__restrict t);
617
618/*! @abstract Normalize a table \c t[nsymbols] of occurrences to
619 * \c freq[nsymbols]. */
620void fse_normalize_freq(int nstates, int nsymbols, const uint32_t *__restrict t,
621 uint16_t *__restrict freq);
622
623
624