1 | /* |
2 | Copyright (c) 2015-2016, Apple Inc. All rights reserved. |
3 | |
4 | Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: |
5 | |
6 | 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. |
7 | |
8 | 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer |
9 | in the documentation and/or other materials provided with the distribution. |
10 | |
11 | 3. Neither the name of the copyright holder(s) nor the names of any contributors may be used to endorse or promote products derived |
12 | from this software without specific prior written permission. |
13 | |
14 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
15 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE |
16 | COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES |
17 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
18 | HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
19 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
20 | */ |
21 | |
22 | // Finite state entropy coding (FSE) |
23 | // This is an implementation of the tANS algorithm described by Jarek Duda, |
24 | // we use the more descriptive name "Finite State Entropy". |
25 | |
26 | #pragma once |
27 | |
28 | #include <assert.h> |
29 | #include <stddef.h> |
30 | #include <stdint.h> |
31 | #include <stdlib.h> |
32 | #include <string.h> |
33 | |
34 | // Select between 32/64-bit I/O streams for FSE. Note that the FSE stream |
35 | // size need not match the word size of the machine, but in practice you |
36 | // want to use 64b streams on 64b systems for better performance. |
37 | #if defined __x86_64__ || defined __arm64__ |
38 | #define FSE_IOSTREAM_64 1 |
39 | #else |
40 | #define FSE_IOSTREAM_64 0 |
41 | #endif |
42 | |
43 | #define FSE_INLINE static inline __attribute__((__always_inline__)) |
44 | |
45 | #pragma mark - Bit utils |
46 | |
47 | /*! @abstract Signed type used to represent bit count. */ |
48 | typedef int32_t fse_bit_count; |
49 | |
50 | /*! @abstract Unsigned type used to represent FSE state. */ |
51 | typedef uint16_t fse_state; |
52 | |
53 | // Mask the NBITS lsb of X. 0 <= NBITS < 64 |
54 | static inline uint64_t fse_mask_lsb64(uint64_t x, fse_bit_count nbits) { |
55 | static const uint64_t mtable[65] = { |
56 | 0x0000000000000000LLU, 0x0000000000000001LLU, 0x0000000000000003LLU, |
57 | 0x0000000000000007LLU, 0x000000000000000fLLU, 0x000000000000001fLLU, |
58 | 0x000000000000003fLLU, 0x000000000000007fLLU, 0x00000000000000ffLLU, |
59 | 0x00000000000001ffLLU, 0x00000000000003ffLLU, 0x00000000000007ffLLU, |
60 | 0x0000000000000fffLLU, 0x0000000000001fffLLU, 0x0000000000003fffLLU, |
61 | 0x0000000000007fffLLU, 0x000000000000ffffLLU, 0x000000000001ffffLLU, |
62 | 0x000000000003ffffLLU, 0x000000000007ffffLLU, 0x00000000000fffffLLU, |
63 | 0x00000000001fffffLLU, 0x00000000003fffffLLU, 0x00000000007fffffLLU, |
64 | 0x0000000000ffffffLLU, 0x0000000001ffffffLLU, 0x0000000003ffffffLLU, |
65 | 0x0000000007ffffffLLU, 0x000000000fffffffLLU, 0x000000001fffffffLLU, |
66 | 0x000000003fffffffLLU, 0x000000007fffffffLLU, 0x00000000ffffffffLLU, |
67 | 0x00000001ffffffffLLU, 0x00000003ffffffffLLU, 0x00000007ffffffffLLU, |
68 | 0x0000000fffffffffLLU, 0x0000001fffffffffLLU, 0x0000003fffffffffLLU, |
69 | 0x0000007fffffffffLLU, 0x000000ffffffffffLLU, 0x000001ffffffffffLLU, |
70 | 0x000003ffffffffffLLU, 0x000007ffffffffffLLU, 0x00000fffffffffffLLU, |
71 | 0x00001fffffffffffLLU, 0x00003fffffffffffLLU, 0x00007fffffffffffLLU, |
72 | 0x0000ffffffffffffLLU, 0x0001ffffffffffffLLU, 0x0003ffffffffffffLLU, |
73 | 0x0007ffffffffffffLLU, 0x000fffffffffffffLLU, 0x001fffffffffffffLLU, |
74 | 0x003fffffffffffffLLU, 0x007fffffffffffffLLU, 0x00ffffffffffffffLLU, |
75 | 0x01ffffffffffffffLLU, 0x03ffffffffffffffLLU, 0x07ffffffffffffffLLU, |
76 | 0x0fffffffffffffffLLU, 0x1fffffffffffffffLLU, 0x3fffffffffffffffLLU, |
77 | 0x7fffffffffffffffLLU, 0xffffffffffffffffLLU, |
78 | }; |
79 | return x & mtable[nbits]; |
80 | } |
81 | |
82 | // Mask the NBITS lsb of X. 0 <= NBITS < 32 |
83 | static inline uint32_t fse_mask_lsb32(uint32_t x, fse_bit_count nbits) { |
84 | static const uint32_t mtable[33] = { |
85 | 0x0000000000000000U, 0x0000000000000001U, 0x0000000000000003U, |
86 | 0x0000000000000007U, 0x000000000000000fU, 0x000000000000001fU, |
87 | 0x000000000000003fU, 0x000000000000007fU, 0x00000000000000ffU, |
88 | 0x00000000000001ffU, 0x00000000000003ffU, 0x00000000000007ffU, |
89 | 0x0000000000000fffU, 0x0000000000001fffU, 0x0000000000003fffU, |
90 | 0x0000000000007fffU, 0x000000000000ffffU, 0x000000000001ffffU, |
91 | 0x000000000003ffffU, 0x000000000007ffffU, 0x00000000000fffffU, |
92 | 0x00000000001fffffU, 0x00000000003fffffU, 0x00000000007fffffU, |
93 | 0x0000000000ffffffU, 0x0000000001ffffffU, 0x0000000003ffffffU, |
94 | 0x0000000007ffffffU, 0x000000000fffffffU, 0x000000001fffffffU, |
95 | 0x000000003fffffffU, 0x000000007fffffffU, 0x00000000ffffffffU, |
96 | }; |
97 | return x & mtable[nbits]; |
98 | } |
99 | |
100 | /*! @abstract Select \c nbits at index \c start from \c x. |
101 | * 0 <= start <= start+nbits <= 64 */ |
102 | FSE_INLINE uint64_t (uint64_t x, fse_bit_count start, |
103 | fse_bit_count nbits) { |
104 | // If START and NBITS are constants, map to bit-field extraction instructions |
105 | if (__builtin_constant_p(start) && __builtin_constant_p(nbits)) |
106 | return (x >> start) & ((1LLU << nbits) - 1LLU); |
107 | |
108 | // Otherwise, shift and mask |
109 | return fse_mask_lsb64(x >> start, nbits); |
110 | } |
111 | |
112 | /*! @abstract Select \c nbits at index \c start from \c x. |
113 | * 0 <= start <= start+nbits <= 32 */ |
114 | FSE_INLINE uint32_t (uint32_t x, fse_bit_count start, |
115 | fse_bit_count nbits) { |
116 | // If START and NBITS are constants, map to bit-field extraction instructions |
117 | if (__builtin_constant_p(start) && __builtin_constant_p(nbits)) |
118 | return (x >> start) & ((1U << nbits) - 1U); |
119 | |
120 | // Otherwise, shift and mask |
121 | return fse_mask_lsb32(x >> start, nbits); |
122 | } |
123 | |
124 | #pragma mark - Bit stream |
125 | |
126 | // I/O streams |
127 | // The streams can be shared between several FSE encoders/decoders, which is why |
128 | // they are not in the state struct |
129 | |
130 | /*! @abstract Output stream, 64-bit accum. */ |
131 | typedef struct { |
132 | uint64_t accum; // Output bits |
133 | fse_bit_count accum_nbits; // Number of valid bits in ACCUM, other bits are 0 |
134 | } fse_out_stream64; |
135 | |
136 | /*! @abstract Output stream, 32-bit accum. */ |
137 | typedef struct { |
138 | uint32_t accum; // Output bits |
139 | fse_bit_count accum_nbits; // Number of valid bits in ACCUM, other bits are 0 |
140 | } fse_out_stream32; |
141 | |
142 | /*! @abstract Object representing an input stream. */ |
143 | typedef struct { |
144 | uint64_t accum; // Input bits |
145 | fse_bit_count accum_nbits; // Number of valid bits in ACCUM, other bits are 0 |
146 | } fse_in_stream64; |
147 | |
148 | /*! @abstract Object representing an input stream. */ |
149 | typedef struct { |
150 | uint32_t accum; // Input bits |
151 | fse_bit_count accum_nbits; // Number of valid bits in ACCUM, other bits are 0 |
152 | } fse_in_stream32; |
153 | |
154 | /*! @abstract Initialize an output stream object. */ |
155 | FSE_INLINE void fse_out_init64(fse_out_stream64 *s) { |
156 | s->accum = 0; |
157 | s->accum_nbits = 0; |
158 | } |
159 | |
160 | /*! @abstract Initialize an output stream object. */ |
161 | FSE_INLINE void fse_out_init32(fse_out_stream32 *s) { |
162 | s->accum = 0; |
163 | s->accum_nbits = 0; |
164 | } |
165 | |
166 | /*! @abstract Write full bytes from the accumulator to output buffer, ensuring |
167 | * accum_nbits is in [0, 7]. |
168 | * We assume we can write 8 bytes to the output buffer \c (*pbuf[0..7]) in all |
169 | * cases. |
170 | * @note *pbuf is incremented by the number of written bytes. */ |
171 | FSE_INLINE void fse_out_flush64(fse_out_stream64 *s, uint8_t **pbuf) { |
172 | fse_bit_count nbits = |
173 | s->accum_nbits & -8; // number of bits written, multiple of 8 |
174 | |
175 | // Write 8 bytes of current accumulator |
176 | memcpy(*pbuf, &(s->accum), 8); |
177 | *pbuf += (nbits >> 3); // bytes |
178 | |
179 | // Update state |
180 | s->accum >>= nbits; // remove nbits |
181 | s->accum_nbits -= nbits; |
182 | |
183 | assert(s->accum_nbits >= 0 && s->accum_nbits <= 7); |
184 | assert(s->accum_nbits == 64 || (s->accum >> s->accum_nbits) == 0); |
185 | } |
186 | |
187 | /*! @abstract Write full bytes from the accumulator to output buffer, ensuring |
188 | * accum_nbits is in [0, 7]. |
189 | * We assume we can write 4 bytes to the output buffer \c (*pbuf[0..3]) in all |
190 | * cases. |
191 | * @note *pbuf is incremented by the number of written bytes. */ |
192 | FSE_INLINE void fse_out_flush32(fse_out_stream32 *s, uint8_t **pbuf) { |
193 | fse_bit_count nbits = |
194 | s->accum_nbits & -8; // number of bits written, multiple of 8 |
195 | |
196 | // Write 4 bytes of current accumulator |
197 | memcpy(*pbuf, &(s->accum), 4); |
198 | *pbuf += (nbits >> 3); // bytes |
199 | |
200 | // Update state |
201 | s->accum >>= nbits; // remove nbits |
202 | s->accum_nbits -= nbits; |
203 | |
204 | assert(s->accum_nbits >= 0 && s->accum_nbits <= 7); |
205 | assert(s->accum_nbits == 32 || (s->accum >> s->accum_nbits) == 0); |
206 | } |
207 | |
208 | /*! @abstract Write the last bytes from the accumulator to output buffer, |
209 | * ensuring accum_nbits is in [-7, 0]. Bits are padded with 0 if needed. |
210 | * We assume we can write 8 bytes to the output buffer \c (*pbuf[0..7]) in all |
211 | * cases. |
212 | * @note *pbuf is incremented by the number of written bytes. */ |
213 | FSE_INLINE void fse_out_finish64(fse_out_stream64 *s, uint8_t **pbuf) { |
214 | fse_bit_count nbits = |
215 | (s->accum_nbits + 7) & -8; // number of bits written, multiple of 8 |
216 | |
217 | // Write 8 bytes of current accumulator |
218 | memcpy(*pbuf, &(s->accum), 8); |
219 | *pbuf += (nbits >> 3); // bytes |
220 | |
221 | // Update state |
222 | s->accum = 0; // remove nbits |
223 | s->accum_nbits -= nbits; |
224 | |
225 | assert(s->accum_nbits >= -7 && s->accum_nbits <= 0); |
226 | } |
227 | |
228 | /*! @abstract Write the last bytes from the accumulator to output buffer, |
229 | * ensuring accum_nbits is in [-7, 0]. Bits are padded with 0 if needed. |
230 | * We assume we can write 4 bytes to the output buffer \c (*pbuf[0..3]) in all |
231 | * cases. |
232 | * @note *pbuf is incremented by the number of written bytes. */ |
233 | FSE_INLINE void fse_out_finish32(fse_out_stream32 *s, uint8_t **pbuf) { |
234 | fse_bit_count nbits = |
235 | (s->accum_nbits + 7) & -8; // number of bits written, multiple of 8 |
236 | |
237 | // Write 8 bytes of current accumulator |
238 | memcpy(*pbuf, &(s->accum), 4); |
239 | *pbuf += (nbits >> 3); // bytes |
240 | |
241 | // Update state |
242 | s->accum = 0; // remove nbits |
243 | s->accum_nbits -= nbits; |
244 | |
245 | assert(s->accum_nbits >= -7 && s->accum_nbits <= 0); |
246 | } |
247 | |
248 | /*! @abstract Accumulate \c n bits \c b to output stream \c s. We \b must have: |
249 | * 0 <= b < 2^n, and N + s->accum_nbits <= 64. |
250 | * @note The caller must ensure out_flush is called \b before the accumulator |
251 | * overflows to more than 64 bits. */ |
252 | FSE_INLINE void fse_out_push64(fse_out_stream64 *s, fse_bit_count n, |
253 | uint64_t b) { |
254 | s->accum |= b << s->accum_nbits; |
255 | s->accum_nbits += n; |
256 | |
257 | assert(s->accum_nbits >= 0 && s->accum_nbits <= 64); |
258 | assert(s->accum_nbits == 64 || (s->accum >> s->accum_nbits) == 0); |
259 | } |
260 | |
261 | /*! @abstract Accumulate \c n bits \c b to output stream \c s. We \b must have: |
262 | * 0 <= n < 2^n, and n + s->accum_nbits <= 32. |
263 | * @note The caller must ensure out_flush is called \b before the accumulator |
264 | * overflows to more than 32 bits. */ |
265 | FSE_INLINE void fse_out_push32(fse_out_stream32 *s, fse_bit_count n, |
266 | uint32_t b) { |
267 | s->accum |= b << s->accum_nbits; |
268 | s->accum_nbits += n; |
269 | |
270 | assert(s->accum_nbits >= 0 && s->accum_nbits <= 32); |
271 | assert(s->accum_nbits == 32 || (s->accum >> s->accum_nbits) == 0); |
272 | } |
273 | |
274 | #if FSE_IOSTREAM_64 |
275 | #define DEBUG_CHECK_INPUT_STREAM_PARAMETERS \ |
276 | assert(s->accum_nbits >= 56 && s->accum_nbits < 64); \ |
277 | assert((s->accum >> s->accum_nbits) == 0); |
278 | #else |
279 | #define DEBUG_CHECK_INPUT_STREAM_PARAMETERS \ |
280 | assert(s->accum_nbits >= 24 && s->accum_nbits < 32); \ |
281 | assert((s->accum >> s->accum_nbits) == 0); |
282 | #endif |
283 | |
284 | /*! @abstract Initialize the fse input stream so that accum holds between 56 |
285 | * and 63 bits. We never want to have 64 bits in the stream, because that allows |
286 | * us to avoid a special case in the fse_in_pull function (eliminating an |
287 | * unpredictable branch), while not requiring any additional fse_flush |
288 | * operations. This is why we have the special case for n == 0 (in which case |
289 | * we want to load only 7 bytes instead of 8). */ |
290 | FSE_INLINE int fse_in_checked_init64(fse_in_stream64 *s, fse_bit_count n, |
291 | const uint8_t **pbuf, |
292 | const uint8_t *buf_start) { |
293 | if (n) { |
294 | if (*pbuf < buf_start + 8) |
295 | return -1; // out of range |
296 | *pbuf -= 8; |
297 | memcpy(&(s->accum), *pbuf, 8); |
298 | s->accum_nbits = n + 64; |
299 | } else { |
300 | if (*pbuf < buf_start + 7) |
301 | return -1; // out of range |
302 | *pbuf -= 7; |
303 | memcpy(&(s->accum), *pbuf, 7); |
304 | s->accum &= 0xffffffffffffff; |
305 | s->accum_nbits = n + 56; |
306 | } |
307 | |
308 | if ((s->accum_nbits < 56 || s->accum_nbits >= 64) || |
309 | ((s->accum >> s->accum_nbits) != 0)) { |
310 | return -1; // the incoming input is wrong (encoder should have zeroed the |
311 | // upper bits) |
312 | } |
313 | |
314 | return 0; // OK |
315 | } |
316 | |
317 | /*! @abstract Identical to previous function, but for 32-bit operation |
318 | * (resulting bit count is between 24 and 31 bits). */ |
319 | FSE_INLINE int fse_in_checked_init32(fse_in_stream32 *s, fse_bit_count n, |
320 | const uint8_t **pbuf, |
321 | const uint8_t *buf_start) { |
322 | if (n) { |
323 | if (*pbuf < buf_start + 4) |
324 | return -1; // out of range |
325 | *pbuf -= 4; |
326 | memcpy(&(s->accum), *pbuf, 4); |
327 | s->accum_nbits = n + 32; |
328 | } else { |
329 | if (*pbuf < buf_start + 3) |
330 | return -1; // out of range |
331 | *pbuf -= 3; |
332 | memcpy(&(s->accum), *pbuf, 3); |
333 | s->accum &= 0xffffff; |
334 | s->accum_nbits = n + 24; |
335 | } |
336 | |
337 | if ((s->accum_nbits < 24 || s->accum_nbits >= 32) || |
338 | ((s->accum >> s->accum_nbits) != 0)) { |
339 | return -1; // the incoming input is wrong (encoder should have zeroed the |
340 | // upper bits) |
341 | } |
342 | |
343 | return 0; // OK |
344 | } |
345 | |
346 | /*! @abstract Read in new bytes from buffer to ensure that we have a full |
347 | * complement of bits in the stream object (again, between 56 and 63 bits). |
348 | * checking the new value of \c *pbuf remains >= \c buf_start. |
349 | * @return 0 if OK. |
350 | * @return -1 on failure. */ |
351 | FSE_INLINE int fse_in_checked_flush64(fse_in_stream64 *s, const uint8_t **pbuf, |
352 | const uint8_t *buf_start) { |
353 | // Get number of bits to add to bring us into the desired range. |
354 | fse_bit_count nbits = (63 - s->accum_nbits) & -8; |
355 | // Convert bits to bytes and decrement buffer address, then load new data. |
356 | const uint8_t *buf = (*pbuf) - (nbits >> 3); |
357 | if (buf < buf_start) { |
358 | return -1; // out of range |
359 | } |
360 | *pbuf = buf; |
361 | uint64_t incoming; |
362 | memcpy(&incoming, buf, 8); |
363 | // Update the state object and verify its validity (in DEBUG). |
364 | s->accum = (s->accum << nbits) | fse_mask_lsb64(incoming, nbits); |
365 | s->accum_nbits += nbits; |
366 | DEBUG_CHECK_INPUT_STREAM_PARAMETERS |
367 | return 0; // OK |
368 | } |
369 | |
370 | /*! @abstract Identical to previous function (but again, we're only filling |
371 | * a 32-bit field with between 24 and 31 bits). */ |
372 | FSE_INLINE int fse_in_checked_flush32(fse_in_stream32 *s, const uint8_t **pbuf, |
373 | const uint8_t *buf_start) { |
374 | // Get number of bits to add to bring us into the desired range. |
375 | fse_bit_count nbits = (31 - s->accum_nbits) & -8; |
376 | |
377 | if (nbits > 0) { |
378 | // Convert bits to bytes and decrement buffer address, then load new data. |
379 | const uint8_t *buf = (*pbuf) - (nbits >> 3); |
380 | if (buf < buf_start) { |
381 | return -1; // out of range |
382 | } |
383 | |
384 | *pbuf = buf; |
385 | |
386 | uint32_t incoming = *((uint32_t *)buf); |
387 | |
388 | // Update the state object and verify its validity (in DEBUG). |
389 | s->accum = (s->accum << nbits) | fse_mask_lsb32(incoming, nbits); |
390 | s->accum_nbits += nbits; |
391 | } |
392 | DEBUG_CHECK_INPUT_STREAM_PARAMETERS |
393 | return 0; // OK |
394 | } |
395 | |
396 | /*! @abstract Pull n bits out of the fse stream object. */ |
397 | FSE_INLINE uint64_t fse_in_pull64(fse_in_stream64 *s, fse_bit_count n) { |
398 | assert(n >= 0 && n <= s->accum_nbits); |
399 | s->accum_nbits -= n; |
400 | uint64_t result = s->accum >> s->accum_nbits; |
401 | s->accum = fse_mask_lsb64(s->accum, s->accum_nbits); |
402 | return result; |
403 | } |
404 | |
405 | /*! @abstract Pull n bits out of the fse stream object. */ |
406 | FSE_INLINE uint32_t fse_in_pull32(fse_in_stream32 *s, fse_bit_count n) { |
407 | assert(n >= 0 && n <= s->accum_nbits); |
408 | s->accum_nbits -= n; |
409 | uint32_t result = s->accum >> s->accum_nbits; |
410 | s->accum = fse_mask_lsb32(s->accum, s->accum_nbits); |
411 | return result; |
412 | } |
413 | |
414 | #pragma mark - Encode/Decode |
415 | |
416 | // Map to 32/64-bit implementations and types for I/O |
417 | #if FSE_IOSTREAM_64 |
418 | |
419 | typedef uint64_t fse_bits; |
420 | typedef fse_out_stream64 fse_out_stream; |
421 | typedef fse_in_stream64 fse_in_stream; |
422 | #define fse_mask_lsb fse_mask_lsb64 |
423 | #define fse_extract_bits64 |
424 | #define fse_out_init fse_out_init64 |
425 | #define fse_out_flush fse_out_flush64 |
426 | #define fse_out_finish fse_out_finish64 |
427 | #define fse_out_push fse_out_push64 |
428 | #define fse_in_init fse_in_checked_init64 |
429 | #define fse_in_checked_init fse_in_checked_init64 |
430 | #define fse_in_flush fse_in_checked_flush64 |
431 | #define fse_in_checked_flush fse_in_checked_flush64 |
432 | #define fse_in_flush2(_unused, _parameters, _unused2) 0 /* nothing */ |
433 | #define fse_in_checked_flush2(_unused, _parameters) /* nothing */ |
434 | #define fse_in_pull fse_in_pull64 |
435 | |
436 | #else |
437 | |
438 | typedef uint32_t fse_bits; |
439 | typedef fse_out_stream32 fse_out_stream; |
440 | typedef fse_in_stream32 fse_in_stream; |
441 | #define fse_mask_lsb fse_mask_lsb32 |
442 | #define fse_extract_bits fse_extract_bits32 |
443 | #define fse_out_init fse_out_init32 |
444 | #define fse_out_flush fse_out_flush32 |
445 | #define fse_out_finish fse_out_finish32 |
446 | #define fse_out_push fse_out_push32 |
447 | #define fse_in_init fse_in_checked_init32 |
448 | #define fse_in_checked_init fse_in_checked_init32 |
449 | #define fse_in_flush fse_in_checked_flush32 |
450 | #define fse_in_checked_flush fse_in_checked_flush32 |
451 | #define fse_in_flush2 fse_in_checked_flush32 |
452 | #define fse_in_checked_flush2 fse_in_checked_flush32 |
453 | #define fse_in_pull fse_in_pull32 |
454 | |
455 | #endif |
456 | |
457 | /*! @abstract Entry for one symbol in the encoder table (64b). */ |
458 | typedef struct { |
459 | int16_t s0; // First state requiring a K-bit shift |
460 | int16_t k; // States S >= S0 are shifted K bits. States S < S0 are |
461 | // shifted K-1 bits |
462 | int16_t delta0; // Relative increment used to compute next state if S >= S0 |
463 | int16_t delta1; // Relative increment used to compute next state if S < S0 |
464 | } fse_encoder_entry; |
465 | |
466 | /*! @abstract Entry for one state in the decoder table (32b). */ |
467 | typedef struct { // DO NOT REORDER THE FIELDS |
468 | int8_t k; // Number of bits to read |
469 | uint8_t symbol; // Emitted symbol |
470 | int16_t delta; // Signed increment used to compute next state (+bias) |
471 | } fse_decoder_entry; |
472 | |
473 | /*! @abstract Entry for one state in the value decoder table (64b). */ |
474 | typedef struct { // DO NOT REORDER THE FIELDS |
475 | uint8_t total_bits; // state bits + extra value bits = shift for next decode |
476 | uint8_t value_bits; // extra value bits |
477 | int16_t delta; // state base (delta) |
478 | int32_t vbase; // value base |
479 | } fse_value_decoder_entry; |
480 | |
481 | /*! @abstract Encode SYMBOL using the encoder table, and update \c *pstate, |
482 | * \c out. |
483 | * @note The caller must ensure we have enough bits available in the output |
484 | * stream accumulator. */ |
485 | FSE_INLINE void fse_encode(fse_state *__restrict pstate, |
486 | const fse_encoder_entry *__restrict encoder_table, |
487 | fse_out_stream *__restrict out, uint8_t symbol) { |
488 | int s = *pstate; |
489 | fse_encoder_entry e = encoder_table[symbol]; |
490 | int s0 = e.s0; |
491 | int k = e.k; |
492 | int delta0 = e.delta0; |
493 | int delta1 = e.delta1; |
494 | |
495 | // Number of bits to write |
496 | int hi = s >= s0; |
497 | fse_bit_count nbits = hi ? k : (k - 1); |
498 | fse_state delta = hi ? delta0 : delta1; |
499 | |
500 | // Write lower NBITS of state |
501 | fse_bits b = fse_mask_lsb(s, nbits); |
502 | fse_out_push(out, nbits, b); |
503 | |
504 | // Update state with remaining bits and delta |
505 | *pstate = delta + (s >> nbits); |
506 | } |
507 | |
508 | /*! @abstract Decode and return symbol using the decoder table, and update |
509 | * \c *pstate, \c in. |
510 | * @note The caller must ensure we have enough bits available in the input |
511 | * stream accumulator. */ |
512 | FSE_INLINE uint8_t fse_decode(fse_state *__restrict pstate, |
513 | const int32_t *__restrict decoder_table, |
514 | fse_in_stream *__restrict in) { |
515 | int32_t e = decoder_table[*pstate]; |
516 | |
517 | // Update state from K bits of input + DELTA |
518 | *pstate = (fse_state)(e >> 16) + (fse_state)fse_in_pull(in, e & 0xff); |
519 | |
520 | // Return the symbol for this state |
521 | return fse_extract_bits(e, 8, 8); // symbol |
522 | } |
523 | |
524 | /*! @abstract Decode and return value using the decoder table, and update \c |
525 | * *pstate, \c in. |
526 | * \c value_decoder_table[nstates] |
527 | * @note The caller must ensure we have enough bits available in the input |
528 | * stream accumulator. */ |
529 | FSE_INLINE int32_t |
530 | fse_value_decode(fse_state *__restrict pstate, |
531 | const fse_value_decoder_entry *value_decoder_table, |
532 | fse_in_stream *__restrict in) { |
533 | fse_value_decoder_entry entry = value_decoder_table[*pstate]; |
534 | uint32_t state_and_value_bits = (uint32_t)fse_in_pull(in, entry.total_bits); |
535 | *pstate = |
536 | (fse_state)(entry.delta + (state_and_value_bits >> entry.value_bits)); |
537 | return (int32_t)(entry.vbase + |
538 | fse_mask_lsb(state_and_value_bits, entry.value_bits)); |
539 | } |
540 | |
541 | #pragma mark - Tables |
542 | |
543 | // IMPORTANT: To properly decode an FSE encoded stream, both encoder/decoder |
544 | // tables shall be initialized with the same parameters, including the |
545 | // FREQ[NSYMBOL] array. |
546 | // |
547 | |
548 | /*! @abstract Sanity check on frequency table, verify sum of \c freq |
549 | * is <= \c number_of_states. */ |
550 | FSE_INLINE int fse_check_freq(const uint16_t *freq_table, |
551 | const size_t table_size, |
552 | const size_t number_of_states) { |
553 | size_t sum_of_freq = 0; |
554 | for (int i = 0; i < table_size; i++) { |
555 | sum_of_freq += freq_table[i]; |
556 | } |
557 | return (sum_of_freq > number_of_states) ? -1 : 0; |
558 | } |
559 | |
560 | /*! @abstract Initialize encoder table \c t[nsymbols]. |
561 | * |
562 | * @param nstates |
563 | * sum \c freq[i]; the number of states (a power of 2). |
564 | * |
565 | * @param nsymbols |
566 | * the number of symbols. |
567 | * |
568 | * @param freq[nsymbols] |
569 | * is a normalized histogram of symbol frequencies, with \c freq[i] >= 0. |
570 | * Some symbols may have a 0 frequency. In that case they should not be |
571 | * present in the data. |
572 | */ |
573 | void fse_init_encoder_table(int nstates, int nsymbols, |
574 | const uint16_t *__restrict freq, |
575 | fse_encoder_entry *__restrict t); |
576 | |
577 | /*! @abstract Initialize decoder table \c t[nstates]. |
578 | * |
579 | * @param nstates |
580 | * sum \c freq[i]; the number of states (a power of 2). |
581 | * |
582 | * @param nsymbols |
583 | * the number of symbols. |
584 | * |
585 | * @param feq[nsymbols] |
586 | * a normalized histogram of symbol frequencies, with \c freq[i] >= 0. |
587 | * Some symbols may have a 0 frequency. In that case they should not be |
588 | * present in the data. |
589 | * |
590 | * @return 0 if OK. |
591 | * @return -1 on failure. |
592 | */ |
593 | int fse_init_decoder_table(int nstates, int nsymbols, |
594 | const uint16_t *__restrict freq, |
595 | int32_t *__restrict t); |
596 | |
597 | /*! @abstract Initialize value decoder table \c t[nstates]. |
598 | * |
599 | * @param nstates |
600 | * sum \cfreq[i]; the number of states (a power of 2). |
601 | * |
602 | * @param nsymbols |
603 | * the number of symbols. |
604 | * |
605 | * @param freq[nsymbols] |
606 | * a normalized histogram of symbol frequencies, with \c freq[i] >= 0. |
607 | * \c symbol_vbits[nsymbols] and \c symbol_vbase[nsymbols] are the number of |
608 | * value bits to read and the base value for each symbol. |
609 | * Some symbols may have a 0 frequency. In that case they should not be |
610 | * present in the data. |
611 | */ |
612 | void fse_init_value_decoder_table(int nstates, int nsymbols, |
613 | const uint16_t *__restrict freq, |
614 | const uint8_t *__restrict symbol_vbits, |
615 | const int32_t *__restrict symbol_vbase, |
616 | fse_value_decoder_entry *__restrict t); |
617 | |
618 | /*! @abstract Normalize a table \c t[nsymbols] of occurrences to |
619 | * \c freq[nsymbols]. */ |
620 | void fse_normalize_freq(int nstates, int nsymbols, const uint32_t *__restrict t, |
621 | uint16_t *__restrict freq); |
622 | |
623 | |
624 | |