lzvn_decode_base.c source code [lzfse/src/lzvn_decode_base.c]

1	/*
2	Copyright (c) 2015-2016, Apple Inc. All rights reserved.
3
4	Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
5
6	1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
7
8	2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer
9	in the documentation and/or other materials provided with the distribution.
10
11	3. Neither the name of the copyright holder(s) nor the names of any contributors may be used to endorse or promote products derived
12	from this software without specific prior written permission.
13
14	THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
15	LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
16	COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
17	(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
18	HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
19	ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
20	*/
21
22	// LZVN low-level decoder
23
24	#include "lzvn_decode_base.h"
25
26	// Both the source and destination buffers are represented by a pointer and
27	// a length; they are always* updated in concert using this macro; however*
28	// many bytes the pointer is advanced, the length is decremented by the same
29	// amount. Thus, pointer + length always points to the byte one past the end
30	// of the buffer.
31	#define PTR_LEN_INC(_pointer, _length, _increment) \
32	(_pointer += _increment, _length -= _increment)
33
34	// Update state with current positions and distance, corresponding to the
35	// beginning of an instruction in both streams
36	#define UPDATE_GOOD \
37	(state->src = src_ptr, state->dst = dst_ptr, state->d_prev = D)
38
39	void lzvn_decode(lzvn_decoder_state *state) {
40	// Jump table for all instructions
41	static const void *opc_tbl[`256`] = {
42	&&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&eos, &&lrg_d,
43	&&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&nop, &&lrg_d,
44	&&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&nop, &&lrg_d,
45	&&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&udef, &&lrg_d,
46	&&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&udef, &&lrg_d,
47	&&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&udef, &&lrg_d,
48	&&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&udef, &&lrg_d,
49	&&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&udef, &&lrg_d,
50	&&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&pre_d, &&lrg_d,
51	&&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&pre_d, &&lrg_d,
52	&&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&pre_d, &&lrg_d,
53	&&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&pre_d, &&lrg_d,
54	&&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&pre_d, &&lrg_d,
55	&&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&pre_d, &&lrg_d,
56	&&udef, &&udef, &&udef, &&udef, &&udef, &&udef, &&udef, &&udef,
57	&&udef, &&udef, &&udef, &&udef, &&udef, &&udef, &&udef, &&udef,
58	&&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&pre_d, &&lrg_d,
59	&&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&pre_d, &&lrg_d,
60	&&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&pre_d, &&lrg_d,
61	&&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&pre_d, &&lrg_d,
62	&&med_d, &&med_d, &&med_d, &&med_d, &&med_d, &&med_d, &&med_d, &&med_d,
63	&&med_d, &&med_d, &&med_d, &&med_d, &&med_d, &&med_d, &&med_d, &&med_d,
64	&&med_d, &&med_d, &&med_d, &&med_d, &&med_d, &&med_d, &&med_d, &&med_d,
65	&&med_d, &&med_d, &&med_d, &&med_d, &&med_d, &&med_d, &&med_d, &&med_d,
66	&&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&pre_d, &&lrg_d,
67	&&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&pre_d, &&lrg_d,
68	&&udef, &&udef, &&udef, &&udef, &&udef, &&udef, &&udef, &&udef,
69	&&udef, &&udef, &&udef, &&udef, &&udef, &&udef, &&udef, &&udef,
70	&&lrg_l, &&sml_l, &&sml_l, &&sml_l, &&sml_l, &&sml_l, &&sml_l, &&sml_l,
71	&&sml_l, &&sml_l, &&sml_l, &&sml_l, &&sml_l, &&sml_l, &&sml_l, &&sml_l,
72	&&lrg_m, &&sml_m, &&sml_m, &&sml_m, &&sml_m, &&sml_m, &&sml_m, &&sml_m,
73	&&sml_m, &&sml_m, &&sml_m, &&sml_m, &&sml_m, &&sml_m, &&sml_m, &&sml_m};
74
75	size_t src_len = state->src_end - state->src;
76	size_t dst_len = state->dst_end - state->dst;
77	if (src_len == `0` \|\| dst_len == `0`)
78	return; // empty buffer
79
80	const unsigned char *src_ptr = state->src;
81	unsigned char *dst_ptr = state->dst;
82	size_t D = state->d_prev;
83	size_t M;
84	size_t L;
85	size_t opc_len;
86
87	// Do we have a partially expanded match saved in state?
88	if (state->L != `0` \|\| state->M != `0`) {
89	L = state->L;
90	M = state->M;
91	D = state->D;
92	opc_len = `0`; // we already skipped the op
93	state->L = state->M = state->D = `0`;
94	if (M == `0`)
95	goto copy_literal;
96	if (L == `0`)
97	goto copy_match;
98	goto copy_literal_and_match;
99	}
100
101	unsigned char opc = src_ptr[`0`];
102	goto *opc_tbl[opc];
103
104	// ===============================================================
105	// These four opcodes (sml_d, med_d, lrg_d, and pre_d) encode both a
106	// literal and a match. The bulk of their implementations are shared;
107	// each label here only does the work of setting the opcode length (not
108	// including any literal bytes), and extracting the literal length, match
109	// length, and match distance (except in pre_d). They then jump into the
110	// shared implementation to actually output the literal and match bytes.
111	//
112	// No error checking happens in the first stage, except for ensuring that
113	// the source has enough length to represent the full opcode before
114	// reading past the first byte.
115	sml_d:
116	UPDATE_GOOD;
117	// "small distance": This opcode has the structure LLMMMDDD DDDDDDDD LITERAL
118	// where the length of literal (0-3 bytes) is encoded by the high 2 bits of
119	// the first byte. We first extract the literal length so we know how long
120	// the opcode is, then check that the source can hold both this opcode and
121	// at least one byte of the next (because any valid input stream must be
122	// terminated with an eos token).
123	opc_len = `2`;
124	L = (size_t)extract(opc, `6`, `2`);
125	M = (size_t)extract(opc, `3`, `3`) + `3`;
126	// We need to ensure that the source buffer is long enough that we can
127	// safely read this entire opcode, the literal that follows, and the first
128	// byte of the next opcode. Once we satisfy this requirement, we can
129	// safely unpack the match distance. A check similar to this one is
130	// present in all the opcode implementations.
131	if (src_len <= opc_len + L)
132	return; // source truncated
133	D = (size_t)extract(opc, `0`, `3`) << `8` \| src_ptr[`1`];
134	goto copy_literal_and_match;
135
136	med_d:
137	UPDATE_GOOD;
138	// "medium distance": This is a minor variant of the "small distance"
139	// encoding, where we will now use two extra bytes instead of one to encode
140	// the restof the match length and distance. This allows an extra two bits
141	// for the match length, and an extra three bits for the match distance. The
142	// full structure of the opcode is 101LLMMM DDDDDDMM DDDDDDDD LITERAL.
143	opc_len = `3`;
144	L = (size_t)extract(opc, `3`, `2`);
145	if (src_len <= opc_len + L)
146	return; // source truncated
147	uint16_t opc23 = load2(&src_ptr[`1`]);
148	M = (size_t)((extract(opc, `0`, `3`) << `2` \| extract(opc23, `0`, `2`)) + `3`);
149	D = (size_t)extract(opc23, `2`, `14`);
150	goto copy_literal_and_match;
151
152	lrg_d:
153	UPDATE_GOOD;
154	// "large distance": This is another variant of the "small distance"
155	// encoding, where we will now use two extra bytes to encode the match
156	// distance, which allows distances up to 65535 to be represented. The full
157	// structure of the opcode is LLMMM111 DDDDDDDD DDDDDDDD LITERAL.
158	opc_len = `3`;
159	L = (size_t)extract(opc, `6`, `2`);
160	M = (size_t)extract(opc, `3`, `3`) + `3`;
161	if (src_len <= opc_len + L)
162	return; // source truncated
163	D = load2(&src_ptr[`1`]);
164	goto copy_literal_and_match;
165
166	pre_d:
167	UPDATE_GOOD;
168	// "previous distance": This opcode has the structure LLMMM110, where the
169	// length of the literal (0-3 bytes) is encoded by the high 2 bits of the
170	// first byte. We first extract the literal length so we know how long
171	// the opcode is, then check that the source can hold both this opcode and
172	// at least one byte of the next (because any valid input stream must be
173	// terminated with an eos token).
174	opc_len = `1`;
175	L = (size_t)extract(opc, `6`, `2`);
176	M = (size_t)extract(opc, `3`, `3`) + `3`;
177	if (src_len <= opc_len + L)
178	return; // source truncated
179	goto copy_literal_and_match;
180
181	copy_literal_and_match:
182	// Common implementation of writing data for opcodes that have both a
183	// literal and a match. We begin by advancing the source pointer past
184	// the opcode, so that it points at the first literal byte (if L
185	// is non-zero; otherwise it points at the next opcode).
186	PTR_LEN_INC(src_ptr, src_len, opc_len);
187	// Now we copy the literal from the source pointer to the destination.
188	if (__builtin_expect(dst_len >= `4` && src_len >= `4`, `1`)) {
189	// The literal is 0-3 bytes; if we are not near the end of the buffer,
190	// we can safely just do a 4 byte copy (which is guaranteed to cover
191	// the complete literal, and may include some other bytes as well).
192	store4(dst_ptr, load4(src_ptr));
193	} else if (L <= dst_len) {
194	// We are too close to the end of either the input or output stream
195	// to be able to safely use a four-byte copy, but we will not exhaust
196	// either stream (we already know that the source will not be
197	// exhausted from checks in the individual opcode implementations,
198	// and we just tested that dst_len > L). Thus, we need to do a
199	// byte-by-byte copy of the literal. This is slow, but it can only ever
200	// happen near the very end of a buffer, so it is not an important case to
201	// optimize.
202	for (size_t i = `0`; i < L; ++i)
203	dst_ptr[i] = src_ptr[i];
204	} else {
205	// Destination truncated: fill DST, and store partial match
206
207	// Copy partial literal
208	for (size_t i = `0`; i < dst_len; ++i)
209	dst_ptr[i] = src_ptr[i];
210	// Save state
211	state->src = src_ptr + dst_len;
212	state->dst = dst_ptr + dst_len;
213	state->L = L - dst_len;
214	state->M = M;
215	state->D = D;
216	return; // destination truncated
217	}
218	// Having completed the copy of the literal, we advance both the source
219	// and destination pointers by the number of literal bytes.
220	PTR_LEN_INC(dst_ptr, dst_len, L);
221	PTR_LEN_INC(src_ptr, src_len, L);
222	// Check if the match distance is valid; matches must not reference
223	// bytes that preceed the start of the output buffer, nor can the match
224	// distance be zero.
225	if (D > dst_ptr - state->dst_begin \|\| D == `0`)
226	goto invalid_match_distance;
227	copy_match:
228	// Now we copy the match from dst_ptr - D to dst_ptr. It is important to keep
229	// in mind that we may have D < M, in which case the source and destination
230	// windows overlap in the copy. The semantics of the match copy are not
231	// those of memmove( ); if the buffers overlap it needs to behave as though
232	// we were copying byte-by-byte in increasing address order. If, for example,
233	// D is 1, the copy operation is equivalent to:
234	//
235	// memset(dst_ptr, dst_ptr[-1], M);
236	//
237	// i.e. it splats the previous byte. This means that we need to be very
238	// careful about using wide loads or stores to perform the copy operation.
239	if (__builtin_expect(dst_len >= M + `7` && D >= `8`, `1`)) {
240	// We are not near the end of the buffer, and the match distance
241	// is at least eight. Thus, we can safely loop using eight byte
242	// copies. The last of these may slop over the intended end of
243	// the match, but this is OK because we know we have a safety bound
244	// away from the end of the destination buffer.
245	for (size_t i = `0`; i < M; i += `8`)
246	store8(&dst_ptr[i], load8(&dst_ptr[i - D]));
247	} else if (M <= dst_len) {
248	// Either the match distance is too small, or we are too close to
249	// the end of the buffer to safely use eight byte copies. Fall back
250	// on a simple byte-by-byte implementation.
251	for (size_t i = `0`; i < M; ++i)
252	dst_ptr[i] = dst_ptr[i - D];
253	} else {
254	// Destination truncated: fill DST, and store partial match
255
256	// Copy partial match
257	for (size_t i = `0`; i < dst_len; ++i)
258	dst_ptr[i] = dst_ptr[i - D];
259	// Save state
260	state->src = src_ptr;
261	state->dst = dst_ptr + dst_len;
262	state->L = `0`;
263	state->M = M - dst_len;
264	state->D = D;
265	return; // destination truncated
266	}
267	// Update the destination pointer and length to account for the bytes
268	// written by the match, then load the next opcode byte and branch to
269	// the appropriate implementation.
270	PTR_LEN_INC(dst_ptr, dst_len, M);
271	opc = src_ptr[`0`];
272	goto *opc_tbl[opc];
273
274	// ===============================================================
275	// Opcodes representing only a match (no literal).
276	// These two opcodes (lrg_m and sml_m) encode only a match. The match
277	// distance is carried over from the previous opcode, so all they need
278	// to encode is the match length. We are able to reuse the match copy
279	// sequence from the literal and match opcodes to perform the actual
280	// copy implementation.
281	sml_m:
282	UPDATE_GOOD;
283	// "small match": This opcode has no literal, and uses the previous match
284	// distance (i.e. it encodes only the match length), in a single byte as
285	// 1111MMMM.
286	opc_len = `1`;
287	if (src_len <= opc_len)
288	return; // source truncated
289	M = (size_t)extract(opc, `0`, `4`);
290	PTR_LEN_INC(src_ptr, src_len, opc_len);
291	goto copy_match;
292
293	lrg_m:
294	UPDATE_GOOD;
295	// "large match": This opcode has no literal, and uses the previous match
296	// distance (i.e. it encodes only the match length). It is encoded in two
297	// bytes as 11110000 MMMMMMMM. Because matches smaller than 16 bytes can
298	// be represented by sml_m, there is an implicit bias of 16 on the match
299	// length; the representable values are [16,271].
300	opc_len = `2`;
301	if (src_len <= opc_len)
302	return; // source truncated
303	M = src_ptr[`1`] + `16`;
304	PTR_LEN_INC(src_ptr, src_len, opc_len);
305	goto copy_match;
306
307	// ===============================================================
308	// Opcodes representing only a literal (no match).
309	// These two opcodes (lrg_l and sml_l) encode only a literal. There is no
310	// match length or match distance to worry about (but we need to not
311	// touch D, as it must be preserved between opcodes).
312	sml_l:
313	UPDATE_GOOD;
314	// "small literal": This opcode has no match, and encodes only a literal
315	// of length up to 15 bytes. The format is 1110LLLL LITERAL.
316	opc_len = `1`;
317	L = (size_t)extract(opc, `0`, `4`);
318	goto copy_literal;
319
320	lrg_l:
321	UPDATE_GOOD;
322	// "large literal": This opcode has no match, and uses the previous match
323	// distance (i.e. it encodes only the match length). It is encoded in two
324	// bytes as 11100000 LLLLLLLL LITERAL. Because literals smaller than 16
325	// bytes can be represented by sml_l, there is an implicit bias of 16 on
326	// the literal length; the representable values are [16,271].
327	opc_len = `2`;
328	if (src_len <= `2`)
329	return; // source truncated
330	L = src_ptr[`1`] + `16`;
331	goto copy_literal;
332
333	copy_literal:
334	// Check that the source buffer is large enough to hold the complete
335	// literal and at least the first byte of the next opcode. If so, advance
336	// the source pointer to point to the first byte of the literal and adjust
337	// the source length accordingly.
338	if (src_len <= opc_len + L)
339	return; // source truncated
340	PTR_LEN_INC(src_ptr, src_len, opc_len);
341	// Now we copy the literal from the source pointer to the destination.
342	if (dst_len >= L + `7` && src_len >= L + `7`) {
343	// We are not near the end of the source or destination buffers; thus
344	// we can safely copy the literal using wide copies, without worrying
345	// about reading or writing past the end of either buffer.
346	for (size_t i = `0`; i < L; i += `8`)
347	store8(&dst_ptr[i], load8(&src_ptr[i]));
348	} else if (L <= dst_len) {
349	// We are too close to the end of either the input or output stream
350	// to be able to safely use an eight-byte copy. Instead we copy the
351	// literal byte-by-byte.
352	for (size_t i = `0`; i < L; ++i)
353	dst_ptr[i] = src_ptr[i];
354	} else {
355	// Destination truncated: fill DST, and store partial match
356
357	// Copy partial literal
358	for (size_t i = `0`; i < dst_len; ++i)
359	dst_ptr[i] = src_ptr[i];
360	// Save state
361	state->src = src_ptr + dst_len;
362	state->dst = dst_ptr + dst_len;
363	state->L = L - dst_len;
364	state->M = `0`;
365	state->D = D;
366	return; // destination truncated
367	}
368	// Having completed the copy of the literal, we advance both the source
369	// and destination pointers by the number of literal bytes.
370	PTR_LEN_INC(dst_ptr, dst_len, L);
371	PTR_LEN_INC(src_ptr, src_len, L);
372	// Load the first byte of the next opcode, and jump to its implementation.
373	opc = src_ptr[`0`];
374	goto *opc_tbl[opc];
375
376	// ===============================================================
377	// Other opcodes
378	nop:
379	UPDATE_GOOD;
380	opc_len = `1`;
381	if (src_len <= opc_len)
382	return; // source truncated
383	PTR_LEN_INC(src_ptr, src_len, opc_len);
384	opc = src_ptr[`0`];
385	goto *opc_tbl[opc];
386
387	eos:
388	opc_len = `8`;
389	if (src_len < opc_len)
390	return; // source truncated (here we don't need an extra byte for next op
391	// code)
392	PTR_LEN_INC(src_ptr, src_len, opc_len);
393	state->end_of_stream = `1`;
394	UPDATE_GOOD;
395	return; // end-of-stream
396
397	// ===============================================================
398	// Return on error
399	udef:
400	invalid_match_distance:
401
402	return; // we already updated state
403	}
404

Browse the source code of lzfse/src/lzvn_decode_base.c