1/*
2Copyright (c) 2015-2016, Apple Inc. All rights reserved.
3
4Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
5
61. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
7
82. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer
9 in the documentation and/or other materials provided with the distribution.
10
113. Neither the name of the copyright holder(s) nor the names of any contributors may be used to endorse or promote products derived
12 from this software without specific prior written permission.
13
14THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
15LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
16COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
17(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
18HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
19ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
20*/
21
22// LZVN low-level decoder
23
24#include "lzvn_decode_base.h"
25
26// Both the source and destination buffers are represented by a pointer and
27// a length; they are *always* updated in concert using this macro; however
28// many bytes the pointer is advanced, the length is decremented by the same
29// amount. Thus, pointer + length always points to the byte one past the end
30// of the buffer.
31#define PTR_LEN_INC(_pointer, _length, _increment) \
32 (_pointer += _increment, _length -= _increment)
33
34// Update state with current positions and distance, corresponding to the
35// beginning of an instruction in both streams
36#define UPDATE_GOOD \
37 (state->src = src_ptr, state->dst = dst_ptr, state->d_prev = D)
38
39void lzvn_decode(lzvn_decoder_state *state) {
40 // Jump table for all instructions
41 static const void *opc_tbl[256] = {
42 &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&eos, &&lrg_d,
43 &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&nop, &&lrg_d,
44 &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&nop, &&lrg_d,
45 &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&udef, &&lrg_d,
46 &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&udef, &&lrg_d,
47 &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&udef, &&lrg_d,
48 &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&udef, &&lrg_d,
49 &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&udef, &&lrg_d,
50 &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&pre_d, &&lrg_d,
51 &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&pre_d, &&lrg_d,
52 &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&pre_d, &&lrg_d,
53 &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&pre_d, &&lrg_d,
54 &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&pre_d, &&lrg_d,
55 &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&pre_d, &&lrg_d,
56 &&udef, &&udef, &&udef, &&udef, &&udef, &&udef, &&udef, &&udef,
57 &&udef, &&udef, &&udef, &&udef, &&udef, &&udef, &&udef, &&udef,
58 &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&pre_d, &&lrg_d,
59 &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&pre_d, &&lrg_d,
60 &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&pre_d, &&lrg_d,
61 &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&pre_d, &&lrg_d,
62 &&med_d, &&med_d, &&med_d, &&med_d, &&med_d, &&med_d, &&med_d, &&med_d,
63 &&med_d, &&med_d, &&med_d, &&med_d, &&med_d, &&med_d, &&med_d, &&med_d,
64 &&med_d, &&med_d, &&med_d, &&med_d, &&med_d, &&med_d, &&med_d, &&med_d,
65 &&med_d, &&med_d, &&med_d, &&med_d, &&med_d, &&med_d, &&med_d, &&med_d,
66 &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&pre_d, &&lrg_d,
67 &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&pre_d, &&lrg_d,
68 &&udef, &&udef, &&udef, &&udef, &&udef, &&udef, &&udef, &&udef,
69 &&udef, &&udef, &&udef, &&udef, &&udef, &&udef, &&udef, &&udef,
70 &&lrg_l, &&sml_l, &&sml_l, &&sml_l, &&sml_l, &&sml_l, &&sml_l, &&sml_l,
71 &&sml_l, &&sml_l, &&sml_l, &&sml_l, &&sml_l, &&sml_l, &&sml_l, &&sml_l,
72 &&lrg_m, &&sml_m, &&sml_m, &&sml_m, &&sml_m, &&sml_m, &&sml_m, &&sml_m,
73 &&sml_m, &&sml_m, &&sml_m, &&sml_m, &&sml_m, &&sml_m, &&sml_m, &&sml_m};
74
75 size_t src_len = state->src_end - state->src;
76 size_t dst_len = state->dst_end - state->dst;
77 if (src_len == 0 || dst_len == 0)
78 return; // empty buffer
79
80 const unsigned char *src_ptr = state->src;
81 unsigned char *dst_ptr = state->dst;
82 size_t D = state->d_prev;
83 size_t M;
84 size_t L;
85 size_t opc_len;
86
87 // Do we have a partially expanded match saved in state?
88 if (state->L != 0 || state->M != 0) {
89 L = state->L;
90 M = state->M;
91 D = state->D;
92 opc_len = 0; // we already skipped the op
93 state->L = state->M = state->D = 0;
94 if (M == 0)
95 goto copy_literal;
96 if (L == 0)
97 goto copy_match;
98 goto copy_literal_and_match;
99 }
100
101 unsigned char opc = src_ptr[0];
102 goto *opc_tbl[opc];
103
104// ===============================================================
105// These four opcodes (sml_d, med_d, lrg_d, and pre_d) encode both a
106// literal and a match. The bulk of their implementations are shared;
107// each label here only does the work of setting the opcode length (not
108// including any literal bytes), and extracting the literal length, match
109// length, and match distance (except in pre_d). They then jump into the
110// shared implementation to actually output the literal and match bytes.
111//
112// No error checking happens in the first stage, except for ensuring that
113// the source has enough length to represent the full opcode before
114// reading past the first byte.
115sml_d:
116 UPDATE_GOOD;
117 // "small distance": This opcode has the structure LLMMMDDD DDDDDDDD LITERAL
118 // where the length of literal (0-3 bytes) is encoded by the high 2 bits of
119 // the first byte. We first extract the literal length so we know how long
120 // the opcode is, then check that the source can hold both this opcode and
121 // at least one byte of the next (because any valid input stream must be
122 // terminated with an eos token).
123 opc_len = 2;
124 L = (size_t)extract(opc, 6, 2);
125 M = (size_t)extract(opc, 3, 3) + 3;
126 // We need to ensure that the source buffer is long enough that we can
127 // safely read this entire opcode, the literal that follows, and the first
128 // byte of the next opcode. Once we satisfy this requirement, we can
129 // safely unpack the match distance. A check similar to this one is
130 // present in all the opcode implementations.
131 if (src_len <= opc_len + L)
132 return; // source truncated
133 D = (size_t)extract(opc, 0, 3) << 8 | src_ptr[1];
134 goto copy_literal_and_match;
135
136med_d:
137 UPDATE_GOOD;
138 // "medium distance": This is a minor variant of the "small distance"
139 // encoding, where we will now use two extra bytes instead of one to encode
140 // the restof the match length and distance. This allows an extra two bits
141 // for the match length, and an extra three bits for the match distance. The
142 // full structure of the opcode is 101LLMMM DDDDDDMM DDDDDDDD LITERAL.
143 opc_len = 3;
144 L = (size_t)extract(opc, 3, 2);
145 if (src_len <= opc_len + L)
146 return; // source truncated
147 uint16_t opc23 = load2(&src_ptr[1]);
148 M = (size_t)((extract(opc, 0, 3) << 2 | extract(opc23, 0, 2)) + 3);
149 D = (size_t)extract(opc23, 2, 14);
150 goto copy_literal_and_match;
151
152lrg_d:
153 UPDATE_GOOD;
154 // "large distance": This is another variant of the "small distance"
155 // encoding, where we will now use two extra bytes to encode the match
156 // distance, which allows distances up to 65535 to be represented. The full
157 // structure of the opcode is LLMMM111 DDDDDDDD DDDDDDDD LITERAL.
158 opc_len = 3;
159 L = (size_t)extract(opc, 6, 2);
160 M = (size_t)extract(opc, 3, 3) + 3;
161 if (src_len <= opc_len + L)
162 return; // source truncated
163 D = load2(&src_ptr[1]);
164 goto copy_literal_and_match;
165
166pre_d:
167 UPDATE_GOOD;
168 // "previous distance": This opcode has the structure LLMMM110, where the
169 // length of the literal (0-3 bytes) is encoded by the high 2 bits of the
170 // first byte. We first extract the literal length so we know how long
171 // the opcode is, then check that the source can hold both this opcode and
172 // at least one byte of the next (because any valid input stream must be
173 // terminated with an eos token).
174 opc_len = 1;
175 L = (size_t)extract(opc, 6, 2);
176 M = (size_t)extract(opc, 3, 3) + 3;
177 if (src_len <= opc_len + L)
178 return; // source truncated
179 goto copy_literal_and_match;
180
181copy_literal_and_match:
182 // Common implementation of writing data for opcodes that have both a
183 // literal and a match. We begin by advancing the source pointer past
184 // the opcode, so that it points at the first literal byte (if L
185 // is non-zero; otherwise it points at the next opcode).
186 PTR_LEN_INC(src_ptr, src_len, opc_len);
187 // Now we copy the literal from the source pointer to the destination.
188 if (__builtin_expect(dst_len >= 4 && src_len >= 4, 1)) {
189 // The literal is 0-3 bytes; if we are not near the end of the buffer,
190 // we can safely just do a 4 byte copy (which is guaranteed to cover
191 // the complete literal, and may include some other bytes as well).
192 store4(dst_ptr, load4(src_ptr));
193 } else if (L <= dst_len) {
194 // We are too close to the end of either the input or output stream
195 // to be able to safely use a four-byte copy, but we will not exhaust
196 // either stream (we already know that the source will not be
197 // exhausted from checks in the individual opcode implementations,
198 // and we just tested that dst_len > L). Thus, we need to do a
199 // byte-by-byte copy of the literal. This is slow, but it can only ever
200 // happen near the very end of a buffer, so it is not an important case to
201 // optimize.
202 for (size_t i = 0; i < L; ++i)
203 dst_ptr[i] = src_ptr[i];
204 } else {
205 // Destination truncated: fill DST, and store partial match
206
207 // Copy partial literal
208 for (size_t i = 0; i < dst_len; ++i)
209 dst_ptr[i] = src_ptr[i];
210 // Save state
211 state->src = src_ptr + dst_len;
212 state->dst = dst_ptr + dst_len;
213 state->L = L - dst_len;
214 state->M = M;
215 state->D = D;
216 return; // destination truncated
217 }
218 // Having completed the copy of the literal, we advance both the source
219 // and destination pointers by the number of literal bytes.
220 PTR_LEN_INC(dst_ptr, dst_len, L);
221 PTR_LEN_INC(src_ptr, src_len, L);
222 // Check if the match distance is valid; matches must not reference
223 // bytes that preceed the start of the output buffer, nor can the match
224 // distance be zero.
225 if (D > dst_ptr - state->dst_begin || D == 0)
226 goto invalid_match_distance;
227copy_match:
228 // Now we copy the match from dst_ptr - D to dst_ptr. It is important to keep
229 // in mind that we may have D < M, in which case the source and destination
230 // windows overlap in the copy. The semantics of the match copy are *not*
231 // those of memmove( ); if the buffers overlap it needs to behave as though
232 // we were copying byte-by-byte in increasing address order. If, for example,
233 // D is 1, the copy operation is equivalent to:
234 //
235 // memset(dst_ptr, dst_ptr[-1], M);
236 //
237 // i.e. it splats the previous byte. This means that we need to be very
238 // careful about using wide loads or stores to perform the copy operation.
239 if (__builtin_expect(dst_len >= M + 7 && D >= 8, 1)) {
240 // We are not near the end of the buffer, and the match distance
241 // is at least eight. Thus, we can safely loop using eight byte
242 // copies. The last of these may slop over the intended end of
243 // the match, but this is OK because we know we have a safety bound
244 // away from the end of the destination buffer.
245 for (size_t i = 0; i < M; i += 8)
246 store8(&dst_ptr[i], load8(&dst_ptr[i - D]));
247 } else if (M <= dst_len) {
248 // Either the match distance is too small, or we are too close to
249 // the end of the buffer to safely use eight byte copies. Fall back
250 // on a simple byte-by-byte implementation.
251 for (size_t i = 0; i < M; ++i)
252 dst_ptr[i] = dst_ptr[i - D];
253 } else {
254 // Destination truncated: fill DST, and store partial match
255
256 // Copy partial match
257 for (size_t i = 0; i < dst_len; ++i)
258 dst_ptr[i] = dst_ptr[i - D];
259 // Save state
260 state->src = src_ptr;
261 state->dst = dst_ptr + dst_len;
262 state->L = 0;
263 state->M = M - dst_len;
264 state->D = D;
265 return; // destination truncated
266 }
267 // Update the destination pointer and length to account for the bytes
268 // written by the match, then load the next opcode byte and branch to
269 // the appropriate implementation.
270 PTR_LEN_INC(dst_ptr, dst_len, M);
271 opc = src_ptr[0];
272 goto *opc_tbl[opc];
273
274// ===============================================================
275// Opcodes representing only a match (no literal).
276// These two opcodes (lrg_m and sml_m) encode only a match. The match
277// distance is carried over from the previous opcode, so all they need
278// to encode is the match length. We are able to reuse the match copy
279// sequence from the literal and match opcodes to perform the actual
280// copy implementation.
281sml_m:
282 UPDATE_GOOD;
283 // "small match": This opcode has no literal, and uses the previous match
284 // distance (i.e. it encodes only the match length), in a single byte as
285 // 1111MMMM.
286 opc_len = 1;
287 if (src_len <= opc_len)
288 return; // source truncated
289 M = (size_t)extract(opc, 0, 4);
290 PTR_LEN_INC(src_ptr, src_len, opc_len);
291 goto copy_match;
292
293lrg_m:
294 UPDATE_GOOD;
295 // "large match": This opcode has no literal, and uses the previous match
296 // distance (i.e. it encodes only the match length). It is encoded in two
297 // bytes as 11110000 MMMMMMMM. Because matches smaller than 16 bytes can
298 // be represented by sml_m, there is an implicit bias of 16 on the match
299 // length; the representable values are [16,271].
300 opc_len = 2;
301 if (src_len <= opc_len)
302 return; // source truncated
303 M = src_ptr[1] + 16;
304 PTR_LEN_INC(src_ptr, src_len, opc_len);
305 goto copy_match;
306
307// ===============================================================
308// Opcodes representing only a literal (no match).
309// These two opcodes (lrg_l and sml_l) encode only a literal. There is no
310// match length or match distance to worry about (but we need to *not*
311// touch D, as it must be preserved between opcodes).
312sml_l:
313 UPDATE_GOOD;
314 // "small literal": This opcode has no match, and encodes only a literal
315 // of length up to 15 bytes. The format is 1110LLLL LITERAL.
316 opc_len = 1;
317 L = (size_t)extract(opc, 0, 4);
318 goto copy_literal;
319
320lrg_l:
321 UPDATE_GOOD;
322 // "large literal": This opcode has no match, and uses the previous match
323 // distance (i.e. it encodes only the match length). It is encoded in two
324 // bytes as 11100000 LLLLLLLL LITERAL. Because literals smaller than 16
325 // bytes can be represented by sml_l, there is an implicit bias of 16 on
326 // the literal length; the representable values are [16,271].
327 opc_len = 2;
328 if (src_len <= 2)
329 return; // source truncated
330 L = src_ptr[1] + 16;
331 goto copy_literal;
332
333copy_literal:
334 // Check that the source buffer is large enough to hold the complete
335 // literal and at least the first byte of the next opcode. If so, advance
336 // the source pointer to point to the first byte of the literal and adjust
337 // the source length accordingly.
338 if (src_len <= opc_len + L)
339 return; // source truncated
340 PTR_LEN_INC(src_ptr, src_len, opc_len);
341 // Now we copy the literal from the source pointer to the destination.
342 if (dst_len >= L + 7 && src_len >= L + 7) {
343 // We are not near the end of the source or destination buffers; thus
344 // we can safely copy the literal using wide copies, without worrying
345 // about reading or writing past the end of either buffer.
346 for (size_t i = 0; i < L; i += 8)
347 store8(&dst_ptr[i], load8(&src_ptr[i]));
348 } else if (L <= dst_len) {
349 // We are too close to the end of either the input or output stream
350 // to be able to safely use an eight-byte copy. Instead we copy the
351 // literal byte-by-byte.
352 for (size_t i = 0; i < L; ++i)
353 dst_ptr[i] = src_ptr[i];
354 } else {
355 // Destination truncated: fill DST, and store partial match
356
357 // Copy partial literal
358 for (size_t i = 0; i < dst_len; ++i)
359 dst_ptr[i] = src_ptr[i];
360 // Save state
361 state->src = src_ptr + dst_len;
362 state->dst = dst_ptr + dst_len;
363 state->L = L - dst_len;
364 state->M = 0;
365 state->D = D;
366 return; // destination truncated
367 }
368 // Having completed the copy of the literal, we advance both the source
369 // and destination pointers by the number of literal bytes.
370 PTR_LEN_INC(dst_ptr, dst_len, L);
371 PTR_LEN_INC(src_ptr, src_len, L);
372 // Load the first byte of the next opcode, and jump to its implementation.
373 opc = src_ptr[0];
374 goto *opc_tbl[opc];
375
376// ===============================================================
377// Other opcodes
378nop:
379 UPDATE_GOOD;
380 opc_len = 1;
381 if (src_len <= opc_len)
382 return; // source truncated
383 PTR_LEN_INC(src_ptr, src_len, opc_len);
384 opc = src_ptr[0];
385 goto *opc_tbl[opc];
386
387eos:
388 opc_len = 8;
389 if (src_len < opc_len)
390 return; // source truncated (here we don't need an extra byte for next op
391 // code)
392 PTR_LEN_INC(src_ptr, src_len, opc_len);
393 state->end_of_stream = 1;
394 UPDATE_GOOD;
395 return; // end-of-stream
396
397// ===============================================================
398// Return on error
399udef:
400invalid_match_distance:
401
402 return; // we already updated state
403}
404