lzfse_encode_base.c source code [lzfse/src/lzfse_encode_base.c]

1	/*
2	Copyright (c) 2015-2016, Apple Inc. All rights reserved.
3
4	Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
5
6	1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
7
8	2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer
9	in the documentation and/or other materials provided with the distribution.
10
11	3. Neither the name of the copyright holder(s) nor the names of any contributors may be used to endorse or promote products derived
12	from this software without specific prior written permission.
13
14	THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
15	LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
16	COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
17	(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
18	HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
19	ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
20	*/
21
22	// LZFSE encoder
23
24	#include "lzfse_internal.h"
25	#include "lzfse_encode_tables.h"
26
27	/! @abstract Get hash in range [0, LZFSE_ENCODE_HASH_VALUES-1] from 4 bytes in X. /
28	static inline uint32_t hashX(uint32_t x) {
29	return (x * `2654435761U`) >>
30	(`32` - LZFSE_ENCODE_HASH_BITS); // Knuth multiplicative hash
31	}
32
33	/! @abstract Return value with all 0 except nbits<=32 unsigned bits from V*
34	* at bit offset OFFSET.
35	* V is assumed to fit on nbits bits. */
36	static inline uint64_t setField(uint32_t v, int offset, int nbits) {
37	assert(offset + nbits < `64` && offset >= `0` && nbits <= `32`);
38	assert(nbits == `32` \|\| (v < (`1U` << nbits)));
39	return ((uint64_t)v << (uint64_t)offset);
40	}
41
42	/! @abstract Encode all fields, except freq, from a*
43	* lzfse_compressed_block_header_v1 to a lzfse_compressed_block_header_v2.
44	* All but the header_size and freq fields of the output are modified. */
45	static inline void
46	lzfse_encode_v1_state(lzfse_compressed_block_header_v2 *out,
47	const lzfse_compressed_block_header_v1 *in) {
48	out->magic = LZFSE_COMPRESSEDV2_BLOCK_MAGIC;
49	out->n_raw_bytes = in->n_raw_bytes;
50
51	// Literal state
52	out->packed_fields[`0`] = setField(in->n_literals, `0`, `20`) \|
53	setField(in->n_literal_payload_bytes, `20`, `20`) \|
54	setField(in->n_matches, `40`, `20`) \|
55	setField(`7` + in->literal_bits, `60`, `3`);
56	out->packed_fields[`1`] = setField(in->literal_state[`0`], `0`, `10`) \|
57	setField(in->literal_state[`1`], `10`, `10`) \|
58	setField(in->literal_state[`2`], `20`, `10`) \|
59	setField(in->literal_state[`3`], `30`, `10`) \|
60	setField(in->n_lmd_payload_bytes, `40`, `20`) \|
61	setField(`7` + in->lmd_bits, `60`, `3`);
62	out->packed_fields[`2`] = out->packed_fields[`2`] // header_size already stored in v[2]
63	\| setField(in->l_state, `32`, `10`) \| setField(in->m_state, `42`, `10`) \|
64	setField(in->d_state, `52`, `10`);
65	}
66
67	/! @abstract Encode an entry value in a freq table. Return bits, and sets*
68	* nbits to the number of bits to serialize. /
69	static inline uint32_t lzfse_encode_v1_freq_value(int value, int *nbits) {
70	// Fixed Huffman code, bits are read from LSB.
71	// Note that we rely on the position of the first '0' bit providing the number
72	// of bits.
73	switch (value) {
74	case `0`:
75	*nbits = `2`;
76	return `0`; // 0.0
77	case `1`:
78	*nbits = `2`;
79	return `2`; // 1.0
80	case `2`:
81	*nbits = `3`;
82	return `1`; // 0.01
83	case `3`:
84	*nbits = `3`;
85	return `5`; // 1.01
86	case `4`:
87	*nbits = `5`;
88	return `3`; // 00.011
89	case `5`:
90	*nbits = `5`;
91	return `11`; // 01.011
92	case `6`:
93	*nbits = `5`;
94	return `19`; // 10.011
95	case `7`:
96	*nbits = `5`;
97	return `27`; // 11.011
98	default:
99	break;
100	}
101	if (value < `24`) {
102	nbits = `8`; // 4+4*
103	return `7` + ((value - `8`) << `4`); // xxxx.0111
104	}
105	// 24..1047
106	nbits = `14`; // 4+10*
107	return ((value - `24`) << `4`) + `15`; // xxxxxxxxxx.1111
108	}
109
110	/! @abstract Encode all tables from a lzfse_compressed_block_header_v1*
111	* to a lzfse_compressed_block_header_v2.
112	* Only the header_size and freq fields of the output are modified.
113	* @return Size of the lzfse_compressed_block_header_v2 */
114	static inline size_t
115	lzfse_encode_v1_freq_table(lzfse_compressed_block_header_v2 *out,
116	const lzfse_compressed_block_header_v1 *in) {
117	uint32_t accum = `0`;
118	int accum_nbits = `0`;
119	const uint16_t src = &(in->l_freq[`0`]); // first value of first table (struct*
120	// will not be modified, so this code
121	// will remain valid)
122	uint8_t *dst = &(out->freq[`0`]);
123	for (int i = `0`; i < LZFSE_ENCODE_L_SYMBOLS + LZFSE_ENCODE_M_SYMBOLS +
124	LZFSE_ENCODE_D_SYMBOLS + LZFSE_ENCODE_LITERAL_SYMBOLS;
125	i++) {
126	// Encode one value to accum
127	int nbits = `0`;
128	uint32_t bits = lzfse_encode_v1_freq_value(src[i], &nbits);
129	assert(bits < (`1` << nbits));
130	accum \|= bits << accum_nbits;
131	accum_nbits += nbits;
132
133	// Store bytes from accum to output buffer
134	while (accum_nbits >= `8`) {
135	*dst = (uint8_t)(accum & `0xff`);
136	accum >>= `8`;
137	accum_nbits -= `8`;
138	dst++;
139	}
140	}
141	// Store final byte if needed
142	if (accum_nbits > `0`) {
143	*dst = (uint8_t)(accum & `0xff`);
144	dst++;
145	}
146
147	// Return final size of out
148	uint32_t header_size = (uint32_t)(dst - (uint8_t *)out);
149	out->packed_fields[`0`] = `0`;
150	out->packed_fields[`1`] = `0`;
151	out->packed_fields[`2`] = setField(header_size, `0`, `32`);
152
153	return header_size;
154	}
155
156	// We need to limit forward match length to make sure it won't split into a too
157	// large number of LMD.
158	// The limit itself is quite large, so it doesn't really impact compression
159	// ratio.
160	// The matches may still be expanded backwards by a few bytes, so the final
161	// length may be greater than this limit, which is OK.
162	#define LZFSE_ENCODE_MAX_MATCH_LENGTH (100 * LZFSE_ENCODE_MAX_M_VALUE)
163
164	// ===============================================================
165	// Encoder back end
166
167	/! @abstract Encode matches stored in STATE into a compressed/uncompressed block.*
168	* @return LZFSE_STATUS_OK on success.
169	* @return LZFSE_STATUS_DST_FULL and restore initial state if output buffer is
170	* full. */
171	static int lzfse_encode_matches(lzfse_encoder_state *s) {
172	if (s->n_literals == `0` && s->n_matches == `0`)
173	return LZFSE_STATUS_OK; // nothing to store, OK
174
175	uint32_t l_occ[LZFSE_ENCODE_L_SYMBOLS];
176	uint32_t m_occ[LZFSE_ENCODE_M_SYMBOLS];
177	uint32_t d_occ[LZFSE_ENCODE_D_SYMBOLS];
178	uint32_t literal_occ[LZFSE_ENCODE_LITERAL_SYMBOLS];
179	fse_encoder_entry l_encoder[LZFSE_ENCODE_L_SYMBOLS];
180	fse_encoder_entry m_encoder[LZFSE_ENCODE_M_SYMBOLS];
181	fse_encoder_entry d_encoder[LZFSE_ENCODE_D_SYMBOLS];
182	fse_encoder_entry literal_encoder[LZFSE_ENCODE_LITERAL_SYMBOLS];
183	int ok = `1`;
184	lzfse_compressed_block_header_v1 header1 = {`0`};
185	lzfse_compressed_block_header_v2 *header2 = `0`;
186
187	// Keep initial state to be able to restore it if DST full
188	uint8_t *dst0 = s->dst;
189	uint32_t n_literals0 = s->n_literals;
190
191	// Add 0x00 literals until n_literals multiple of 4, since we encode 4
192	// interleaved literal streams.
193	while (s->n_literals & `3`) {
194	uint32_t n = s->n_literals++;
195	s->literals[n] = `0`;
196	}
197
198	// Encode previous distance
199	uint32_t d_prev = `0`;
200	for (uint32_t i = `0`; i < s->n_matches; i++) {
201	uint32_t d = s->d_values[i];
202	if (d == d_prev)
203	s->d_values[i] = `0`;
204	else
205	d_prev = d;
206	}
207
208	// Clear occurrence tables
209	memset(l_occ, `0`, sizeof(l_occ));
210	memset(m_occ, `0`, sizeof(m_occ));
211	memset(d_occ, `0`, sizeof(d_occ));
212	memset(literal_occ, `0`, sizeof(literal_occ));
213
214	// Update occurrence tables in all 4 streams (L,M,D,literals)
215	uint32_t l_sum = `0`;
216	uint32_t m_sum = `0`;
217	for (uint32_t i = `0`; i < s->n_matches; i++) {
218	uint32_t l = s->l_values[i];
219	l_sum += l;
220	l_occ[l_base_from_value(l)]++;
221	}
222	for (uint32_t i = `0`; i < s->n_matches; i++) {
223	uint32_t m = s->m_values[i];
224	m_sum += m;
225	m_occ[m_base_from_value(m)]++;
226	}
227	for (uint32_t i = `0`; i < s->n_matches; i++)
228	d_occ[d_base_from_value(s->d_values[i])]++;
229	for (uint32_t i = `0`; i < s->n_literals; i++)
230	literal_occ[s->literals[i]]++;
231
232	// Make sure we have enough room for a _full_ V2 header
233	if (s->dst + sizeof(lzfse_compressed_block_header_v2) > s->dst_end) {
234	ok = `0`;
235	goto END;
236	}
237	header2 = (lzfse_compressed_block_header_v2 *)(s->dst);
238
239	// Setup header V1
240	header1.magic = LZFSE_COMPRESSEDV1_BLOCK_MAGIC;
241	header1.n_raw_bytes = m_sum + l_sum;
242	header1.n_matches = s->n_matches;
243	header1.n_literals = s->n_literals;
244
245	// Normalize occurrence tables to freq tables
246	fse_normalize_freq(LZFSE_ENCODE_L_STATES, LZFSE_ENCODE_L_SYMBOLS, l_occ,
247	header1.l_freq);
248	fse_normalize_freq(LZFSE_ENCODE_M_STATES, LZFSE_ENCODE_M_SYMBOLS, m_occ,
249	header1.m_freq);
250	fse_normalize_freq(LZFSE_ENCODE_D_STATES, LZFSE_ENCODE_D_SYMBOLS, d_occ,
251	header1.d_freq);
252	fse_normalize_freq(LZFSE_ENCODE_LITERAL_STATES, LZFSE_ENCODE_LITERAL_SYMBOLS,
253	literal_occ, header1.literal_freq);
254
255	// Compress freq tables to V2 header, and get actual size of V2 header
256	s->dst += lzfse_encode_v1_freq_table(header2, &header1);
257
258	// Initialize encoder tables from freq tables
259	fse_init_encoder_table(LZFSE_ENCODE_L_STATES, LZFSE_ENCODE_L_SYMBOLS,
260	header1.l_freq, l_encoder);
261	fse_init_encoder_table(LZFSE_ENCODE_M_STATES, LZFSE_ENCODE_M_SYMBOLS,
262	header1.m_freq, m_encoder);
263	fse_init_encoder_table(LZFSE_ENCODE_D_STATES, LZFSE_ENCODE_D_SYMBOLS,
264	header1.d_freq, d_encoder);
265	fse_init_encoder_table(LZFSE_ENCODE_LITERAL_STATES,
266	LZFSE_ENCODE_LITERAL_SYMBOLS, header1.literal_freq,
267	literal_encoder);
268
269	// Encode literals
270	{
271	fse_out_stream out;
272	fse_out_init(&out);
273	fse_state state0, state1, state2, state3;
274	state0 = state1 = state2 = state3 = `0`;
275
276	uint8_t *buf = s->dst;
277	uint32_t i = s->n_literals; // I multiple of 4
278	// We encode starting from the last literal so we can decode starting from
279	// the first
280	while (i > `0`) {
281	if (buf + `16` > s->dst_end) {
282	ok = `0`;
283	goto END;
284	} // out full
285	i -= `4`;
286	fse_encode(&state3, literal_encoder, &out, s->literals[i + `3`]); // 10b
287	fse_encode(&state2, literal_encoder, &out, s->literals[i + `2`]); // 10b
288	#if !FSE_IOSTREAM_64
289	fse_out_flush(&out, &buf);
290	#endif
291	fse_encode(&state1, literal_encoder, &out, s->literals[i + `1`]); // 10b
292	fse_encode(&state0, literal_encoder, &out, s->literals[i + `0`]); // 10b
293	fse_out_flush(&out, &buf);
294	}
295	fse_out_finish(&out, &buf);
296
297	// Update header with final encoder state
298	header1.literal_bits = out.accum_nbits; // [-7, 0]
299	header1.n_literal_payload_bytes = (uint32_t)(buf - s->dst);
300	header1.literal_state[`0`] = state0;
301	header1.literal_state[`1`] = state1;
302	header1.literal_state[`2`] = state2;
303	header1.literal_state[`3`] = state3;
304
305	// Update state
306	s->dst = buf;
307
308	} // literals
309
310	// Encode L,M,D
311	{
312	fse_out_stream out;
313	fse_out_init(&out);
314	fse_state l_state, m_state, d_state;
315	l_state = m_state = d_state = `0`;
316
317	uint8_t *buf = s->dst;
318	uint32_t i = s->n_matches;
319
320	// Add 8 padding bytes to the L,M,D payload
321	if (buf + `8` > s->dst_end) {
322	ok = `0`;
323	goto END;
324	} // out full
325	store8(buf, `0`);
326	buf += `8`;
327
328	// We encode starting from the last match so we can decode starting from the
329	// first
330	while (i > `0`) {
331	if (buf + `16` > s->dst_end) {
332	ok = `0`;
333	goto END;
334	} // out full
335	i -= `1`;
336
337	// D requires 23b max
338	int32_t d_value = s->d_values[i];
339	uint8_t d_symbol = d_base_from_value(d_value);
340	int32_t d_nbits = d_extra_bits[d_symbol];
341	int32_t d_bits = d_value - d_base_value[d_symbol];
342	fse_out_push(&out, d_nbits, d_bits);
343	fse_encode(&d_state, d_encoder, &out, d_symbol);
344	#if !FSE_IOSTREAM_64
345	fse_out_flush(&out, &buf);
346	#endif
347
348	// M requires 17b max
349	int32_t m_value = s->m_values[i];
350	uint8_t m_symbol = m_base_from_value(m_value);
351	int32_t m_nbits = m_extra_bits[m_symbol];
352	int32_t m_bits = m_value - m_base_value[m_symbol];
353	fse_out_push(&out, m_nbits, m_bits);
354	fse_encode(&m_state, m_encoder, &out, m_symbol);
355	#if !FSE_IOSTREAM_64
356	fse_out_flush(&out, &buf);
357	#endif
358
359	// L requires 14b max
360	int32_t l_value = s->l_values[i];
361	uint8_t l_symbol = l_base_from_value(l_value);
362	int32_t l_nbits = l_extra_bits[l_symbol];
363	int32_t l_bits = l_value - l_base_value[l_symbol];
364	fse_out_push(&out, l_nbits, l_bits);
365	fse_encode(&l_state, l_encoder, &out, l_symbol);
366	fse_out_flush(&out, &buf);
367	}
368	fse_out_finish(&out, &buf);
369
370	// Update header with final encoder state
371	header1.n_lmd_payload_bytes = (uint32_t)(buf - s->dst);
372	header1.lmd_bits = out.accum_nbits; // [-7, 0]
373	header1.l_state = l_state;
374	header1.m_state = m_state;
375	header1.d_state = d_state;
376
377	// Update state
378	s->dst = buf;
379
380	} // L,M,D
381
382	// Final state update, here we had enough space in DST, and are not going to
383	// revert state
384	s->n_literals = `0`;
385	s->n_matches = `0`;
386
387	// Final payload size
388	header1.n_payload_bytes =
389	header1.n_literal_payload_bytes + header1.n_lmd_payload_bytes;
390
391	// Encode state info in V2 header (we previously encoded the tables, now we
392	// set the other fields)
393	lzfse_encode_v1_state(header2, &header1);
394
395	END:
396	if (!ok) {
397	// Revert state, DST was full
398
399	// Revert the d_prev encoding
400	uint32_t d_prev = `0`;
401	for (uint32_t i = `0`; i < s->n_matches; i++) {
402	uint32_t d = s->d_values[i];
403	if (d == `0`)
404	s->d_values[i] = d_prev;
405	else
406	d_prev = d;
407	}
408
409	// Revert literal count
410	s->n_literals = n_literals0;
411
412	// Revert DST
413	s->dst = dst0;
414
415	return LZFSE_STATUS_DST_FULL; // DST full
416	}
417
418	return LZFSE_STATUS_OK;
419	}
420
421	/! @abstract Push a L,M,D match into the STATE.*
422	* @return LZFSE_STATUS_OK if OK.
423	* @return LZFSE_STATUS_DST_FULL if the match can't be pushed, meaning one of
424	* the buffers is full. In that case the state is not modified. */
425	static inline int lzfse_push_lmd(lzfse_encoder_state *s, uint32_t L,
426	uint32_t M, uint32_t D) {
427	// Check if we have enough space to push the match (we add some margin to copy
428	// literals faster here, and round final count later)
429	if (s->n_matches + `1` + `8` > LZFSE_MATCHES_PER_BLOCK)
430	return LZFSE_STATUS_DST_FULL; // state full
431	if (s->n_literals + L + `16` > LZFSE_LITERALS_PER_BLOCK)
432	return LZFSE_STATUS_DST_FULL; // state full
433
434	// Store match
435	uint32_t n = s->n_matches++;
436	s->l_values[n] = L;
437	s->m_values[n] = M;
438	s->d_values[n] = D;
439
440	// Store literals
441	uint8_t *dst = s->literals + s->n_literals;
442	const uint8_t *src = s->src + s->src_literal;
443	uint8_t *dst_end = dst + L;
444	if (s->src_literal + L + `16` > s->src_end) {
445	// Careful at the end of SRC, we can't read 16 bytes
446	if (L > `0`)
447	memcpy(dst, src, L);
448	} else {
449	copy16(dst, src);
450	dst += `16`;
451	src += `16`;
452	while (dst < dst_end) {
453	copy16(dst, src);
454	dst += `16`;
455	src += `16`;
456	}
457	}
458	s->n_literals += L;
459
460	// Update state
461	s->src_literal += L + M;
462
463	return LZFSE_STATUS_OK;
464	}
465
466	/! @abstract Split MATCH into one or more L,M,D parts, and push to STATE.*
467	* @return LZFSE_STATUS_OK if OK.
468	* @return LZFSE_STATUS_DST_FULL if the match can't be pushed, meaning one of the
469	* buffers is full. In that case the state is not modified. */
470	static int lzfse_push_match(lzfse_encoder_state s, const* lzfse_match *match) {
471	// Save the initial n_matches, n_literals, src_literal
472	uint32_t n_matches0 = s->n_matches;
473	uint32_t n_literals0 = s->n_literals;
474	lzfse_offset src_literals0 = s->src_literal;
475
476	// L,M,D
477	uint32_t L = (uint32_t)(match->pos - s->src_literal); // literal count
478	uint32_t M = match->length; // match length
479	uint32_t D = (uint32_t)(match->pos - match->ref); // match distance
480	int ok = `1`;
481
482	// Split L if too large
483	while (L > LZFSE_ENCODE_MAX_L_VALUE) {
484	if (lzfse_push_lmd(s, LZFSE_ENCODE_MAX_L_VALUE, `0`, `1`) != `0`) {
485	ok = `0`;
486	goto END;
487	} // take D=1 because most frequent, but not actually used
488	L -= LZFSE_ENCODE_MAX_L_VALUE;
489	}
490
491	// Split if M too large
492	while (M > LZFSE_ENCODE_MAX_M_VALUE) {
493	if (lzfse_push_lmd(s, L, LZFSE_ENCODE_MAX_M_VALUE, D) != `0`) {
494	ok = `0`;
495	goto END;
496	}
497	L = `0`;
498	M -= LZFSE_ENCODE_MAX_M_VALUE;
499	}
500
501	// L,M in range
502	if (L > `0` \|\| M > `0`) {
503	if (lzfse_push_lmd(s, L, M, D) != `0`) {
504	ok = `0`;
505	goto END;
506	}
507	L = M = `0`;
508	(void)L;
509	(void)M; // dead stores
510	}
511
512	END:
513	if (!ok) {
514	// Revert state
515	s->n_matches = n_matches0;
516	s->n_literals = n_literals0;
517	s->src_literal = src_literals0;
518
519	return LZFSE_STATUS_DST_FULL; // state tables full
520	}
521
522	return LZFSE_STATUS_OK; // OK
523	}
524
525	/! @abstract Backend: add MATCH to state S. Encode block if necessary, when*
526	* state is full.
527	* @return LZFSE_STATUS_OK if OK.
528	* @return LZFSE_STATUS_DST_FULL if the match can't be added, meaning one of the
529	* buffers is full. In that case the state is not modified. */
530	static int lzfse_backend_match(lzfse_encoder_state *s,
531	const lzfse_match *match) {
532	// Try to push the match in state
533	if (lzfse_push_match(s, match) == LZFSE_STATUS_OK)
534	return LZFSE_STATUS_OK; // OK, match added to state
535
536	// Here state tables are full, try to emit block
537	if (lzfse_encode_matches(s) != LZFSE_STATUS_OK)
538	return LZFSE_STATUS_DST_FULL; // DST full, match not added
539
540	// Here block has been emitted, re-try to push the match in state
541	return lzfse_push_match(s, match);
542	}
543
544	/! @abstract Backend: add L literals to state S. Encode block if necessary,*
545	* when state is full.
546	* @return LZFSE_STATUS_OK if OK.
547	* @return LZFSE_STATUS_DST_FULL if the literals can't be added, meaning one of
548	* the buffers is full. In that case the state is not modified. */
549	static int lzfse_backend_literals(lzfse_encoder_state *s, lzfse_offset L) {
550	// Create a fake match with M=0, D=1
551	lzfse_match match;
552	lzfse_offset pos = s->src_literal + L;
553	match.pos = pos;
554	match.ref = match.pos - `1`;
555	match.length = `0`;
556	return lzfse_backend_match(s, &match);
557	}
558
559	/! @abstract Backend: flush final block, and emit end of stream*
560	* @return LZFSE_STATUS_OK if OK.
561	* @return LZFSE_STATUS_DST_FULL if either the final block, or the end-of-stream
562	* can't be added, meaning one of the buffers is full. If the block was emitted,
563	* the state is updated to reflect this. Otherwise, it is left unchanged. */
564	static int lzfse_backend_end_of_stream(lzfse_encoder_state *s) {
565	// Final match triggers write, otherwise emit blocks when we have enough
566	// matches stored
567	if (lzfse_encode_matches(s) != LZFSE_STATUS_OK)
568	return LZFSE_STATUS_DST_FULL; // DST full
569
570	// Emit end-of-stream block
571	if (s->dst + `4` > s->dst_end)
572	return LZFSE_STATUS_DST_FULL; // DST full
573	store4(s->dst, LZFSE_ENDOFSTREAM_BLOCK_MAGIC);
574	s->dst += `4`;
575
576	return LZFSE_STATUS_OK; // OK
577	}
578
579	// ===============================================================
580	// Encoder state management
581
582	/! @abstract Initialize state:*
583	* @code
584	* - hash table with all invalid pos, and value 0.
585	* - pending match to NO_MATCH.
586	* - src_literal to 0.
587	* - d_prev to 0.
588	@endcode
589	* @return LZFSE_STATUS_OK */
590	int lzfse_encode_init(lzfse_encoder_state *s) {
591	const lzfse_match NO_MATCH = {`0`};
592	lzfse_history_set line;
593	for (int i = `0`; i < LZFSE_ENCODE_HASH_WIDTH; i++) {
594	line.pos[i] = -`4` * LZFSE_ENCODE_MAX_D_VALUE; // invalid pos
595	line.value[i] = `0`;
596	}
597	// Fill table
598	for (int i = `0`; i < LZFSE_ENCODE_HASH_VALUES; i++)
599	s->history_table[i] = line;
600	s->pending = NO_MATCH;
601	s->src_literal = `0`;
602
603	return LZFSE_STATUS_OK; // OK
604	}
605
606	/! @abstract Translate state \p src forward by \p delta > 0.*
607	* Offsets in \p src are updated backwards to point to the same positions.
608	* @return LZFSE_STATUS_OK */
609	int lzfse_encode_translate(lzfse_encoder_state *s, lzfse_offset delta) {
610	assert(delta >= `0`);
611	if (delta == `0`)
612	return LZFSE_STATUS_OK; // OK
613
614	// SRC
615	s->src += delta;
616
617	// Offsets in SRC
618	s->src_end -= delta;
619	s->src_encode_i -= delta;
620	s->src_encode_end -= delta;
621	s->src_literal -= delta;
622
623	// Pending match
624	s->pending.pos -= delta;
625	s->pending.ref -= delta;
626
627	// history_table positions, translated, and clamped to invalid pos
628	int32_t invalidPos = -`4` * LZFSE_ENCODE_MAX_D_VALUE;
629	for (int i = `0`; i < LZFSE_ENCODE_HASH_VALUES; i++) {
630	int32_t *p = &(s->history_table[i].pos[`0`]);
631	for (int j = `0`; j < LZFSE_ENCODE_HASH_WIDTH; j++) {
632	lzfse_offset newPos = p[j] - delta; // translate
633	p[j] = (int32_t)((newPos < invalidPos) ? invalidPos : newPos); // clamp
634	}
635	}
636
637	return LZFSE_STATUS_OK; // OK
638	}
639
640	// ===============================================================
641	// Encoder front end
642
643	int lzfse_encode_base(lzfse_encoder_state *s) {
644	lzfse_history_set *history_table = s->history_table;
645	lzfse_history_set *hashLine = `0`;
646	lzfse_history_set newH;
647	const lzfse_match NO_MATCH = {`0`};
648	int ok = `1`;
649
650	memset(&newH, `0x00`, sizeof(newH));
651
652	// 8 byte padding at end of buffer
653	s->src_encode_end = s->src_end - `8`;
654	for (; s->src_encode_i < s->src_encode_end; s->src_encode_i++) {
655	lzfse_offset pos = s->src_encode_i; // pos >= 0
656
657	// Load 4 byte value and get hash line
658	uint32_t x = load4(s->src + pos);
659	hashLine = history_table + hashX(x);
660	lzfse_history_set h = *hashLine;
661
662	// Prepare next hash line (component 0 is the most recent) to prepare new
663	// entries (stored later)
664	{
665	newH.pos[`0`] = (int32_t)pos;
666	for (int k = `0`; k < LZFSE_ENCODE_HASH_WIDTH - `1`; k++)
667	newH.pos[k + `1`] = h.pos[k];
668	newH.value[`0`] = x;
669	for (int k = `0`; k < LZFSE_ENCODE_HASH_WIDTH - `1`; k++)
670	newH.value[k + `1`] = h.value[k];
671	}
672
673	// Do not look for a match if we are still covered by a previous match
674	if (pos < s->src_literal)
675	goto END_POS;
676
677	// Search best incoming match
678	lzfse_match incoming = {.pos = pos, .ref = `0`, .length = `0`};
679
680	// Check for matches. We consider matches of length >= 4 only.
681	for (int k = `0`; k < LZFSE_ENCODE_HASH_WIDTH; k++) {
682	uint32_t d = h.value[k] ^ x;
683	if (d)
684	continue; // no 4 byte match
685	int32_t ref = h.pos[k];
686	if (ref + LZFSE_ENCODE_MAX_D_VALUE < pos)
687	continue; // too far
688
689	const uint8_t *src_ref = s->src + ref;
690	const uint8_t *src_pos = s->src + pos;
691	uint32_t length = `4`;
692	uint32_t maxLength =
693	(uint32_t)(s->src_end - pos - `8`); // ensure we don't hit the end of SRC
694	while (length < maxLength) {
695	uint64_t d = load8(src_ref + length) ^ load8(src_pos + length);
696	if (d == `0`) {
697	length += `8`;
698	continue;
699	}
700
701	length +=
702	(__builtin_ctzll(d) >> `3`); // ctzll must be called only with D != 0
703	break;
704	}
705	if (length > incoming.length) {
706	incoming.length = length;
707	incoming.ref = ref;
708	} // keep if longer
709	}
710
711	// No incoming match?
712	if (incoming.length == `0`) {
713	// We may still want to emit some literals here, to not lag too far behind
714	// the current search point, and avoid
715	// ending up with a literal block not fitting in the state.
716	lzfse_offset n_literals = pos - s->src_literal;
717	// The threshold here should be larger than a couple of MAX_L_VALUE, and
718	// much smaller than LITERALS_PER_BLOCK
719	if (n_literals > `8` * LZFSE_ENCODE_MAX_L_VALUE) {
720	// Here, we need to consume some literals. Emit pending match if there
721	// is one
722	if (s->pending.length > `0`) {
723	if (lzfse_backend_match(s, &s->pending) != LZFSE_STATUS_OK) {
724	ok = `0`;
725	goto END;
726	}
727	s->pending = NO_MATCH;
728	} else {
729	// No pending match, emit a full LZFSE_ENCODE_MAX_L_VALUE block of
730	// literals
731	if (lzfse_backend_literals(s, LZFSE_ENCODE_MAX_L_VALUE) !=
732	LZFSE_STATUS_OK) {
733	ok = `0`;
734	goto END;
735	}
736	}
737	}
738	goto END_POS; // no incoming match
739	}
740
741	// Limit match length (it may still be expanded backwards, but this is
742	// bounded by the limit on literals we tested before)
743	if (incoming.length > LZFSE_ENCODE_MAX_MATCH_LENGTH) {
744	incoming.length = LZFSE_ENCODE_MAX_MATCH_LENGTH;
745	}
746
747	// Expand backwards (since this is expensive, we do this for the best match
748	// only)
749	while (incoming.pos > s->src_literal && incoming.ref > `0` &&
750	s->src[incoming.ref - `1`] == s->src[incoming.pos - `1`]) {
751	incoming.pos--;
752	incoming.ref--;
753	}
754	incoming.length += pos - incoming.pos; // update length after expansion
755
756	// Match filtering heuristic (from LZVN). INCOMING is always defined here.
757
758	// Incoming is 'good', emit incoming
759	if (incoming.length >= LZFSE_ENCODE_GOOD_MATCH) {
760	if (lzfse_backend_match(s, &incoming) != LZFSE_STATUS_OK) {
761	ok = `0`;
762	goto END;
763	}
764	s->pending = NO_MATCH;
765	goto END_POS;
766	}
767
768	// No pending, keep incoming
769	if (s->pending.length == `0`) {
770	s->pending = incoming;
771	goto END_POS;
772	}
773
774	// No overlap, emit pending, keep incoming
775	if (s->pending.pos + s->pending.length <= incoming.pos) {
776	if (lzfse_backend_match(s, &s->pending) != LZFSE_STATUS_OK) {
777	ok = `0`;
778	goto END;
779	}
780	s->pending = incoming;
781	goto END_POS;
782	}
783
784	// Overlap: emit longest
785	if (incoming.length > s->pending.length) {
786	if (lzfse_backend_match(s, &incoming) != LZFSE_STATUS_OK) {
787	ok = `0`;
788	goto END;
789	}
790	} else {
791	if (lzfse_backend_match(s, &s->pending) != LZFSE_STATUS_OK) {
792	ok = `0`;
793	goto END;
794	}
795	}
796	s->pending = NO_MATCH;
797
798	END_POS:
799	// We are done with this src_encode_i.
800	// Update state now (s->pending has already been updated).
801	*hashLine = newH;
802	}
803
804	END:
805	return ok ? LZFSE_STATUS_OK : LZFSE_STATUS_DST_FULL;
806	}
807
808	int lzfse_encode_finish(lzfse_encoder_state *s) {
809	const lzfse_match NO_MATCH = {`0`};
810
811	// Emit pending match
812	if (s->pending.length > `0`) {
813	if (lzfse_backend_match(s, &s->pending) != LZFSE_STATUS_OK)
814	return LZFSE_STATUS_DST_FULL;
815	s->pending = NO_MATCH;
816	}
817
818	// Emit final literals if any
819	lzfse_offset L = s->src_end - s->src_literal;
820	if (L > `0`) {
821	if (lzfse_backend_literals(s, L) != LZFSE_STATUS_OK)
822	return LZFSE_STATUS_DST_FULL;
823	}
824
825	// Emit all matches, and end-of-stream block
826	if (lzfse_backend_end_of_stream(s) != LZFSE_STATUS_OK)
827	return LZFSE_STATUS_DST_FULL;
828
829	return LZFSE_STATUS_OK;
830	}
831

Browse the source code of lzfse/src/lzfse_encode_base.c