lzfse_fse.h source code [lzfse/src/lzfse_fse.h]

1	/*
2	Copyright (c) 2015-2016, Apple Inc. All rights reserved.
3
4	Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
5
6	1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
7
8	2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer
9	in the documentation and/or other materials provided with the distribution.
10
11	3. Neither the name of the copyright holder(s) nor the names of any contributors may be used to endorse or promote products derived
12	from this software without specific prior written permission.
13
14	THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
15	LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
16	COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
17	(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
18	HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
19	ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
20	*/
21
22	// Finite state entropy coding (FSE)
23	// This is an implementation of the tANS algorithm described by Jarek Duda,
24	// we use the more descriptive name "Finite State Entropy".
25
26	#pragma once
27
28	#include <assert.h>
29	#include <stddef.h>
30	#include <stdint.h>
31	#include <stdlib.h>
32	#include <string.h>
33
34	// Select between 32/64-bit I/O streams for FSE. Note that the FSE stream
35	// size need not match the word size of the machine, but in practice you
36	// want to use 64b streams on 64b systems for better performance.
37	#if defined __x86_64__ \|\| defined __arm64__
38	#define FSE_IOSTREAM_64 1
39	#else
40	#define FSE_IOSTREAM_64 0
41	#endif
42
43	#define FSE_INLINE static inline __attribute__((__always_inline__))
44
45	#pragma mark - Bit utils
46
47	/! @abstract Signed type used to represent bit count. /
48	typedef int32_t fse_bit_count;
49
50	/! @abstract Unsigned type used to represent FSE state. /
51	typedef uint16_t fse_state;
52
53	// Mask the NBITS lsb of X. 0 <= NBITS < 64
54	static inline uint64_t fse_mask_lsb64(uint64_t x, fse_bit_count nbits) {
55	static const uint64_t mtable[`65`] = {
56	`0x0000000000000000LLU`, `0x0000000000000001LLU`, `0x0000000000000003LLU`,
57	`0x0000000000000007LLU`, `0x000000000000000fLLU`, `0x000000000000001fLLU`,
58	`0x000000000000003fLLU`, `0x000000000000007fLLU`, `0x00000000000000ffLLU`,
59	`0x00000000000001ffLLU`, `0x00000000000003ffLLU`, `0x00000000000007ffLLU`,
60	`0x0000000000000fffLLU`, `0x0000000000001fffLLU`, `0x0000000000003fffLLU`,
61	`0x0000000000007fffLLU`, `0x000000000000ffffLLU`, `0x000000000001ffffLLU`,
62	`0x000000000003ffffLLU`, `0x000000000007ffffLLU`, `0x00000000000fffffLLU`,
63	`0x00000000001fffffLLU`, `0x00000000003fffffLLU`, `0x00000000007fffffLLU`,
64	`0x0000000000ffffffLLU`, `0x0000000001ffffffLLU`, `0x0000000003ffffffLLU`,
65	`0x0000000007ffffffLLU`, `0x000000000fffffffLLU`, `0x000000001fffffffLLU`,
66	`0x000000003fffffffLLU`, `0x000000007fffffffLLU`, `0x00000000ffffffffLLU`,
67	`0x00000001ffffffffLLU`, `0x00000003ffffffffLLU`, `0x00000007ffffffffLLU`,
68	`0x0000000fffffffffLLU`, `0x0000001fffffffffLLU`, `0x0000003fffffffffLLU`,
69	`0x0000007fffffffffLLU`, `0x000000ffffffffffLLU`, `0x000001ffffffffffLLU`,
70	`0x000003ffffffffffLLU`, `0x000007ffffffffffLLU`, `0x00000fffffffffffLLU`,
71	`0x00001fffffffffffLLU`, `0x00003fffffffffffLLU`, `0x00007fffffffffffLLU`,
72	`0x0000ffffffffffffLLU`, `0x0001ffffffffffffLLU`, `0x0003ffffffffffffLLU`,
73	`0x0007ffffffffffffLLU`, `0x000fffffffffffffLLU`, `0x001fffffffffffffLLU`,
74	`0x003fffffffffffffLLU`, `0x007fffffffffffffLLU`, `0x00ffffffffffffffLLU`,
75	`0x01ffffffffffffffLLU`, `0x03ffffffffffffffLLU`, `0x07ffffffffffffffLLU`,
76	`0x0fffffffffffffffLLU`, `0x1fffffffffffffffLLU`, `0x3fffffffffffffffLLU`,
77	`0x7fffffffffffffffLLU`, `0xffffffffffffffffLLU`,
78	};
79	return x & mtable[nbits];
80	}
81
82	// Mask the NBITS lsb of X. 0 <= NBITS < 32
83	static inline uint32_t fse_mask_lsb32(uint32_t x, fse_bit_count nbits) {
84	static const uint32_t mtable[`33`] = {
85	`0x0000000000000000U`, `0x0000000000000001U`, `0x0000000000000003U`,
86	`0x0000000000000007U`, `0x000000000000000fU`, `0x000000000000001fU`,
87	`0x000000000000003fU`, `0x000000000000007fU`, `0x00000000000000ffU`,
88	`0x00000000000001ffU`, `0x00000000000003ffU`, `0x00000000000007ffU`,
89	`0x0000000000000fffU`, `0x0000000000001fffU`, `0x0000000000003fffU`,
90	`0x0000000000007fffU`, `0x000000000000ffffU`, `0x000000000001ffffU`,
91	`0x000000000003ffffU`, `0x000000000007ffffU`, `0x00000000000fffffU`,
92	`0x00000000001fffffU`, `0x00000000003fffffU`, `0x00000000007fffffU`,
93	`0x0000000000ffffffU`, `0x0000000001ffffffU`, `0x0000000003ffffffU`,
94	`0x0000000007ffffffU`, `0x000000000fffffffU`, `0x000000001fffffffU`,
95	`0x000000003fffffffU`, `0x000000007fffffffU`, `0x00000000ffffffffU`,
96	};
97	return x & mtable[nbits];
98	}
99
100	/! @abstract Select \c nbits at index \c start from \c x.*
101	* 0 <= start <= start+nbits <= 64 */
102	FSE_INLINE uint64_t fse_extract_bits64(uint64_t x, fse_bit_count start,
103	fse_bit_count nbits) {
104	// If START and NBITS are constants, map to bit-field extraction instructions
105	if (__builtin_constant_p(start) && __builtin_constant_p(nbits))
106	return (x >> start) & ((`1LLU` << nbits) - `1LLU`);
107
108	// Otherwise, shift and mask
109	return fse_mask_lsb64(x >> start, nbits);
110	}
111
112	/! @abstract Select \c nbits at index \c start from \c x.*
113	* 0 <= start <= start+nbits <= 32 */
114	FSE_INLINE uint32_t fse_extract_bits32(uint32_t x, fse_bit_count start,
115	fse_bit_count nbits) {
116	// If START and NBITS are constants, map to bit-field extraction instructions
117	if (__builtin_constant_p(start) && __builtin_constant_p(nbits))
118	return (x >> start) & ((`1U` << nbits) - `1U`);
119
120	// Otherwise, shift and mask
121	return fse_mask_lsb32(x >> start, nbits);
122	}
123
124	#pragma mark - Bit stream
125
126	// I/O streams
127	// The streams can be shared between several FSE encoders/decoders, which is why
128	// they are not in the state struct
129
130	/! @abstract Output stream, 64-bit accum. /
131	typedef struct {
132	uint64_t accum; // Output bits
133	fse_bit_count accum_nbits; // Number of valid bits in ACCUM, other bits are 0
134	} fse_out_stream64;
135
136	/! @abstract Output stream, 32-bit accum. /
137	typedef struct {
138	uint32_t accum; // Output bits
139	fse_bit_count accum_nbits; // Number of valid bits in ACCUM, other bits are 0
140	} fse_out_stream32;
141
142	/! @abstract Object representing an input stream. /
143	typedef struct {
144	uint64_t accum; // Input bits
145	fse_bit_count accum_nbits; // Number of valid bits in ACCUM, other bits are 0
146	} fse_in_stream64;
147
148	/! @abstract Object representing an input stream. /
149	typedef struct {
150	uint32_t accum; // Input bits
151	fse_bit_count accum_nbits; // Number of valid bits in ACCUM, other bits are 0
152	} fse_in_stream32;
153
154	/! @abstract Initialize an output stream object. /
155	FSE_INLINE void fse_out_init64(fse_out_stream64 *s) {
156	s->accum = `0`;
157	s->accum_nbits = `0`;
158	}
159
160	/! @abstract Initialize an output stream object. /
161	FSE_INLINE void fse_out_init32(fse_out_stream32 *s) {
162	s->accum = `0`;
163	s->accum_nbits = `0`;
164	}
165
166	/! @abstract Write full bytes from the accumulator to output buffer, ensuring*
167	* accum_nbits is in [0, 7].
168	* We assume we can write 8 bytes to the output buffer \c (*pbuf[0..7]) in all
169	* cases.
170	* @note pbuf is incremented by the number of written bytes. /
171	FSE_INLINE void fse_out_flush64(fse_out_stream64 s, uint8_t *pbuf) {
172	fse_bit_count nbits =
173	s->accum_nbits & -`8`; // number of bits written, multiple of 8
174
175	// Write 8 bytes of current accumulator
176	memcpy(*pbuf, &(s->accum), `8`);
177	pbuf += (nbits >> `3`); // bytes*
178
179	// Update state
180	s->accum >>= nbits; // remove nbits
181	s->accum_nbits -= nbits;
182
183	assert(s->accum_nbits >= `0` && s->accum_nbits <= `7`);
184	assert(s->accum_nbits == `64` \|\| (s->accum >> s->accum_nbits) == `0`);
185	}
186
187	/! @abstract Write full bytes from the accumulator to output buffer, ensuring*
188	* accum_nbits is in [0, 7].
189	* We assume we can write 4 bytes to the output buffer \c (*pbuf[0..3]) in all
190	* cases.
191	* @note pbuf is incremented by the number of written bytes. /
192	FSE_INLINE void fse_out_flush32(fse_out_stream32 s, uint8_t *pbuf) {
193	fse_bit_count nbits =
194	s->accum_nbits & -`8`; // number of bits written, multiple of 8
195
196	// Write 4 bytes of current accumulator
197	memcpy(*pbuf, &(s->accum), `4`);
198	pbuf += (nbits >> `3`); // bytes*
199
200	// Update state
201	s->accum >>= nbits; // remove nbits
202	s->accum_nbits -= nbits;
203
204	assert(s->accum_nbits >= `0` && s->accum_nbits <= `7`);
205	assert(s->accum_nbits == `32` \|\| (s->accum >> s->accum_nbits) == `0`);
206	}
207
208	/! @abstract Write the last bytes from the accumulator to output buffer,*
209	* ensuring accum_nbits is in [-7, 0]. Bits are padded with 0 if needed.
210	* We assume we can write 8 bytes to the output buffer \c (*pbuf[0..7]) in all
211	* cases.
212	* @note pbuf is incremented by the number of written bytes. /
213	FSE_INLINE void fse_out_finish64(fse_out_stream64 s, uint8_t *pbuf) {
214	fse_bit_count nbits =
215	(s->accum_nbits + `7`) & -`8`; // number of bits written, multiple of 8
216
217	// Write 8 bytes of current accumulator
218	memcpy(*pbuf, &(s->accum), `8`);
219	pbuf += (nbits >> `3`); // bytes*
220
221	// Update state
222	s->accum = `0`; // remove nbits
223	s->accum_nbits -= nbits;
224
225	assert(s->accum_nbits >= -`7` && s->accum_nbits <= `0`);
226	}
227
228	/! @abstract Write the last bytes from the accumulator to output buffer,*
229	* ensuring accum_nbits is in [-7, 0]. Bits are padded with 0 if needed.
230	* We assume we can write 4 bytes to the output buffer \c (*pbuf[0..3]) in all
231	* cases.
232	* @note pbuf is incremented by the number of written bytes. /
233	FSE_INLINE void fse_out_finish32(fse_out_stream32 s, uint8_t *pbuf) {
234	fse_bit_count nbits =
235	(s->accum_nbits + `7`) & -`8`; // number of bits written, multiple of 8
236
237	// Write 8 bytes of current accumulator
238	memcpy(*pbuf, &(s->accum), `4`);
239	pbuf += (nbits >> `3`); // bytes*
240
241	// Update state
242	s->accum = `0`; // remove nbits
243	s->accum_nbits -= nbits;
244
245	assert(s->accum_nbits >= -`7` && s->accum_nbits <= `0`);
246	}
247
248	/! @abstract Accumulate \c n bits \c b to output stream \c s. We \b must have:*
249	* 0 <= b < 2^n, and N + s->accum_nbits <= 64.
250	* @note The caller must ensure out_flush is called \b before the accumulator
251	* overflows to more than 64 bits. */
252	FSE_INLINE void fse_out_push64(fse_out_stream64 *s, fse_bit_count n,
253	uint64_t b) {
254	s->accum \|= b << s->accum_nbits;
255	s->accum_nbits += n;
256
257	assert(s->accum_nbits >= `0` && s->accum_nbits <= `64`);
258	assert(s->accum_nbits == `64` \|\| (s->accum >> s->accum_nbits) == `0`);
259	}
260
261	/! @abstract Accumulate \c n bits \c b to output stream \c s. We \b must have:*
262	* 0 <= n < 2^n, and n + s->accum_nbits <= 32.
263	* @note The caller must ensure out_flush is called \b before the accumulator
264	* overflows to more than 32 bits. */
265	FSE_INLINE void fse_out_push32(fse_out_stream32 *s, fse_bit_count n,
266	uint32_t b) {
267	s->accum \|= b << s->accum_nbits;
268	s->accum_nbits += n;
269
270	assert(s->accum_nbits >= `0` && s->accum_nbits <= `32`);
271	assert(s->accum_nbits == `32` \|\| (s->accum >> s->accum_nbits) == `0`);
272	}
273
274	#if FSE_IOSTREAM_64
275	#define DEBUG_CHECK_INPUT_STREAM_PARAMETERS \
276	assert(s->accum_nbits >= 56 && s->accum_nbits < 64); \
277	assert((s->accum >> s->accum_nbits) == 0);
278	#else
279	#define DEBUG_CHECK_INPUT_STREAM_PARAMETERS \
280	assert(s->accum_nbits >= 24 && s->accum_nbits < 32); \
281	assert((s->accum >> s->accum_nbits) == 0);
282	#endif
283
284	/! @abstract Initialize the fse input stream so that accum holds between 56*
285	* and 63 bits. We never want to have 64 bits in the stream, because that allows
286	* us to avoid a special case in the fse_in_pull function (eliminating an
287	* unpredictable branch), while not requiring any additional fse_flush
288	* operations. This is why we have the special case for n == 0 (in which case
289	* we want to load only 7 bytes instead of 8). */
290	FSE_INLINE int fse_in_checked_init64(fse_in_stream64 *s, fse_bit_count n,
291	const uint8_t **pbuf,
292	const uint8_t *buf_start) {
293	if (n) {
294	if (*pbuf < buf_start + `8`)
295	return -`1`; // out of range
296	*pbuf -= `8`;
297	memcpy(&(s->accum), *pbuf, `8`);
298	s->accum_nbits = n + `64`;
299	} else {
300	if (*pbuf < buf_start + `7`)
301	return -`1`; // out of range
302	*pbuf -= `7`;
303	memcpy(&(s->accum), *pbuf, `7`);
304	s->accum &= `0xffffffffffffff`;
305	s->accum_nbits = n + `56`;
306	}
307
308	if ((s->accum_nbits < `56` \|\| s->accum_nbits >= `64`) \|\|
309	((s->accum >> s->accum_nbits) != `0`)) {
310	return -`1`; // the incoming input is wrong (encoder should have zeroed the
311	// upper bits)
312	}
313
314	return `0`; // OK
315	}
316
317	/! @abstract Identical to previous function, but for 32-bit operation*
318	* (resulting bit count is between 24 and 31 bits). */
319	FSE_INLINE int fse_in_checked_init32(fse_in_stream32 *s, fse_bit_count n,
320	const uint8_t **pbuf,
321	const uint8_t *buf_start) {
322	if (n) {
323	if (*pbuf < buf_start + `4`)
324	return -`1`; // out of range
325	*pbuf -= `4`;
326	memcpy(&(s->accum), *pbuf, `4`);
327	s->accum_nbits = n + `32`;
328	} else {
329	if (*pbuf < buf_start + `3`)
330	return -`1`; // out of range
331	*pbuf -= `3`;
332	memcpy(&(s->accum), *pbuf, `3`);
333	s->accum &= `0xffffff`;
334	s->accum_nbits = n + `24`;
335	}
336
337	if ((s->accum_nbits < `24` \|\| s->accum_nbits >= `32`) \|\|
338	((s->accum >> s->accum_nbits) != `0`)) {
339	return -`1`; // the incoming input is wrong (encoder should have zeroed the
340	// upper bits)
341	}
342
343	return `0`; // OK
344	}
345
346	/! @abstract Read in new bytes from buffer to ensure that we have a full*
347	* complement of bits in the stream object (again, between 56 and 63 bits).
348	* checking the new value of \c *pbuf remains >= \c buf_start.
349	* @return 0 if OK.
350	* @return -1 on failure. */
351	FSE_INLINE int fse_in_checked_flush64(fse_in_stream64 s, const* uint8_t **pbuf,
352	const uint8_t *buf_start) {
353	// Get number of bits to add to bring us into the desired range.
354	fse_bit_count nbits = (`63` - s->accum_nbits) & -`8`;
355	// Convert bits to bytes and decrement buffer address, then load new data.
356	const uint8_t buf = (pbuf) - (nbits >> `3`);
357	if (buf < buf_start) {
358	return -`1`; // out of range
359	}
360	*pbuf = buf;
361	uint64_t incoming;
362	memcpy(&incoming, buf, `8`);
363	// Update the state object and verify its validity (in DEBUG).
364	s->accum = (s->accum << nbits) \| fse_mask_lsb64(incoming, nbits);
365	s->accum_nbits += nbits;
366	DEBUG_CHECK_INPUT_STREAM_PARAMETERS
367	return `0`; // OK
368	}
369
370	/! @abstract Identical to previous function (but again, we're only filling*
371	* a 32-bit field with between 24 and 31 bits). */
372	FSE_INLINE int fse_in_checked_flush32(fse_in_stream32 s, const* uint8_t **pbuf,
373	const uint8_t *buf_start) {
374	// Get number of bits to add to bring us into the desired range.
375	fse_bit_count nbits = (`31` - s->accum_nbits) & -`8`;
376
377	if (nbits > `0`) {
378	// Convert bits to bytes and decrement buffer address, then load new data.
379	const uint8_t buf = (pbuf) - (nbits >> `3`);
380	if (buf < buf_start) {
381	return -`1`; // out of range
382	}
383
384	*pbuf = buf;
385
386	uint32_t incoming = ((uint32_t )buf);
387
388	// Update the state object and verify its validity (in DEBUG).
389	s->accum = (s->accum << nbits) \| fse_mask_lsb32(incoming, nbits);
390	s->accum_nbits += nbits;
391	}
392	DEBUG_CHECK_INPUT_STREAM_PARAMETERS
393	return `0`; // OK
394	}
395
396	/! @abstract Pull n bits out of the fse stream object. /
397	FSE_INLINE uint64_t fse_in_pull64(fse_in_stream64 *s, fse_bit_count n) {
398	assert(n >= `0` && n <= s->accum_nbits);
399	s->accum_nbits -= n;
400	uint64_t result = s->accum >> s->accum_nbits;
401	s->accum = fse_mask_lsb64(s->accum, s->accum_nbits);
402	return result;
403	}
404
405	/! @abstract Pull n bits out of the fse stream object. /
406	FSE_INLINE uint32_t fse_in_pull32(fse_in_stream32 *s, fse_bit_count n) {
407	assert(n >= `0` && n <= s->accum_nbits);
408	s->accum_nbits -= n;
409	uint32_t result = s->accum >> s->accum_nbits;
410	s->accum = fse_mask_lsb32(s->accum, s->accum_nbits);
411	return result;
412	}
413
414	#pragma mark - Encode/Decode
415
416	// Map to 32/64-bit implementations and types for I/O
417	#if FSE_IOSTREAM_64
418
419	typedef uint64_t fse_bits;
420	typedef fse_out_stream64 fse_out_stream;
421	typedef fse_in_stream64 fse_in_stream;
422	#define fse_mask_lsb fse_mask_lsb64
423	#define fse_extract_bits fse_extract_bits64
424	#define fse_out_init fse_out_init64
425	#define fse_out_flush fse_out_flush64
426	#define fse_out_finish fse_out_finish64
427	#define fse_out_push fse_out_push64
428	#define fse_in_init fse_in_checked_init64
429	#define fse_in_checked_init fse_in_checked_init64
430	#define fse_in_flush fse_in_checked_flush64
431	#define fse_in_checked_flush fse_in_checked_flush64
432	#define fse_in_flush2(_unused, _parameters, _unused2) 0 /* nothing */
433	#define fse_in_checked_flush2(_unused, _parameters) /* nothing */
434	#define fse_in_pull fse_in_pull64
435
436	#else
437
438	typedef uint32_t fse_bits;
439	typedef fse_out_stream32 fse_out_stream;
440	typedef fse_in_stream32 fse_in_stream;
441	#define fse_mask_lsb fse_mask_lsb32
442	#define fse_extract_bits fse_extract_bits32
443	#define fse_out_init fse_out_init32
444	#define fse_out_flush fse_out_flush32
445	#define fse_out_finish fse_out_finish32
446	#define fse_out_push fse_out_push32
447	#define fse_in_init fse_in_checked_init32
448	#define fse_in_checked_init fse_in_checked_init32
449	#define fse_in_flush fse_in_checked_flush32
450	#define fse_in_checked_flush fse_in_checked_flush32
451	#define fse_in_flush2 fse_in_checked_flush32
452	#define fse_in_checked_flush2 fse_in_checked_flush32
453	#define fse_in_pull fse_in_pull32
454
455	#endif
456
457	/! @abstract Entry for one symbol in the encoder table (64b). /
458	typedef struct {
459	int16_t s0; // First state requiring a K-bit shift
460	int16_t k; // States S >= S0 are shifted K bits. States S < S0 are
461	// shifted K-1 bits
462	int16_t delta0; // Relative increment used to compute next state if S >= S0
463	int16_t delta1; // Relative increment used to compute next state if S < S0
464	} fse_encoder_entry;
465
466	/! @abstract Entry for one state in the decoder table (32b). /
467	typedef struct { // DO NOT REORDER THE FIELDS
468	int8_t k; // Number of bits to read
469	uint8_t symbol; // Emitted symbol
470	int16_t delta; // Signed increment used to compute next state (+bias)
471	} fse_decoder_entry;
472
473	/! @abstract Entry for one state in the value decoder table (64b). /
474	typedef struct { // DO NOT REORDER THE FIELDS
475	uint8_t total_bits; // state bits + extra value bits = shift for next decode
476	uint8_t value_bits; // extra value bits
477	int16_t delta; // state base (delta)
478	int32_t vbase; // value base
479	} fse_value_decoder_entry;
480
481	/! @abstract Encode SYMBOL using the encoder table, and update \c pstate,
482	* \c out.
483	* @note The caller must ensure we have enough bits available in the output
484	* stream accumulator. */
485	FSE_INLINE void fse_encode(fse_state *__restrict pstate,
486	const fse_encoder_entry *__restrict encoder_table,
487	fse_out_stream *__restrict out, uint8_t symbol) {
488	int s = *pstate;
489	fse_encoder_entry e = encoder_table[symbol];
490	int s0 = e.s0;
491	int k = e.k;
492	int delta0 = e.delta0;
493	int delta1 = e.delta1;
494
495	// Number of bits to write
496	int hi = s >= s0;
497	fse_bit_count nbits = hi ? k : (k - `1`);
498	fse_state delta = hi ? delta0 : delta1;
499
500	// Write lower NBITS of state
501	fse_bits b = fse_mask_lsb(s, nbits);
502	fse_out_push(out, nbits, b);
503
504	// Update state with remaining bits and delta
505	*pstate = delta + (s >> nbits);
506	}
507
508	/! @abstract Decode and return symbol using the decoder table, and update*
509	* \c *pstate, \c in.
510	* @note The caller must ensure we have enough bits available in the input
511	* stream accumulator. */
512	FSE_INLINE uint8_t fse_decode(fse_state *__restrict pstate,
513	const int32_t *__restrict decoder_table,
514	fse_in_stream *__restrict in) {
515	int32_t e = decoder_table[*pstate];
516
517	// Update state from K bits of input + DELTA
518	*pstate = (fse_state)(e >> `16`) + (fse_state)fse_in_pull(in, e & `0xff`);
519
520	// Return the symbol for this state
521	return fse_extract_bits(e, `8`, `8`); // symbol
522	}
523
524	/! @abstract Decode and return value using the decoder table, and update \c*
525	* *pstate, \c in.
526	* \c value_decoder_table[nstates]
527	* @note The caller must ensure we have enough bits available in the input
528	* stream accumulator. */
529	FSE_INLINE int32_t
530	fse_value_decode(fse_state *__restrict pstate,
531	const fse_value_decoder_entry *value_decoder_table,
532	fse_in_stream *__restrict in) {
533	fse_value_decoder_entry entry = value_decoder_table[*pstate];
534	uint32_t state_and_value_bits = (uint32_t)fse_in_pull(in, entry.total_bits);
535	*pstate =
536	(fse_state)(entry.delta + (state_and_value_bits >> entry.value_bits));
537	return (int32_t)(entry.vbase +
538	fse_mask_lsb(state_and_value_bits, entry.value_bits));
539	}
540
541	#pragma mark - Tables
542
543	// IMPORTANT: To properly decode an FSE encoded stream, both encoder/decoder
544	// tables shall be initialized with the same parameters, including the
545	// FREQ[NSYMBOL] array.
546	//
547
548	/! @abstract Sanity check on frequency table, verify sum of \c freq*
549	* is <= \c number_of_states. */
550	FSE_INLINE int fse_check_freq(const uint16_t *freq_table,
551	const size_t table_size,
552	const size_t number_of_states) {
553	size_t sum_of_freq = `0`;
554	for (int i = `0`; i < table_size; i++) {
555	sum_of_freq += freq_table[i];
556	}
557	return (sum_of_freq > number_of_states) ? -`1` : `0`;
558	}
559
560	/! @abstract Initialize encoder table \c t[nsymbols].*
561	*
562	* @param nstates
563	* sum \c freq[i]; the number of states (a power of 2).
564	*
565	* @param nsymbols
566	* the number of symbols.
567	*
568	* @param freq[nsymbols]
569	* is a normalized histogram of symbol frequencies, with \c freq[i] >= 0.
570	* Some symbols may have a 0 frequency. In that case they should not be
571	* present in the data.
572	*/
573	void fse_init_encoder_table(int nstates, int nsymbols,
574	const uint16_t *__restrict freq,
575	fse_encoder_entry *__restrict t);
576
577	/! @abstract Initialize decoder table \c t[nstates].*
578	*
579	* @param nstates
580	* sum \c freq[i]; the number of states (a power of 2).
581	*
582	* @param nsymbols
583	* the number of symbols.
584	*
585	* @param feq[nsymbols]
586	* a normalized histogram of symbol frequencies, with \c freq[i] >= 0.
587	* Some symbols may have a 0 frequency. In that case they should not be
588	* present in the data.
589	*
590	* @return 0 if OK.
591	* @return -1 on failure.
592	*/
593	int fse_init_decoder_table(int nstates, int nsymbols,
594	const uint16_t *__restrict freq,
595	int32_t *__restrict t);
596
597	/! @abstract Initialize value decoder table \c t[nstates].*
598	*
599	* @param nstates
600	* sum \cfreq[i]; the number of states (a power of 2).
601	*
602	* @param nsymbols
603	* the number of symbols.
604	*
605	* @param freq[nsymbols]
606	* a normalized histogram of symbol frequencies, with \c freq[i] >= 0.
607	* \c symbol_vbits[nsymbols] and \c symbol_vbase[nsymbols] are the number of
608	* value bits to read and the base value for each symbol.
609	* Some symbols may have a 0 frequency. In that case they should not be
610	* present in the data.
611	*/
612	void fse_init_value_decoder_table(int nstates, int nsymbols,
613	const uint16_t *__restrict freq,
614	const uint8_t *__restrict symbol_vbits,
615	const int32_t *__restrict symbol_vbase,
616	fse_value_decoder_entry *__restrict t);
617
618	/! @abstract Normalize a table \c t[nsymbols] of occurrences to*
619	* \c freq[nsymbols]. */
620	void fse_normalize_freq(int nstates, int nsymbols, const uint32_t *__restrict t,
621	uint16_t *__restrict freq);
622
623
624

Browse the source code of lzfse/src/lzfse_fse.h