1 | /* |
2 | Copyright (c) 2015-2016, Apple Inc. All rights reserved. |
3 | |
4 | Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: |
5 | |
6 | 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. |
7 | |
8 | 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer |
9 | in the documentation and/or other materials provided with the distribution. |
10 | |
11 | 3. Neither the name of the copyright holder(s) nor the names of any contributors may be used to endorse or promote products derived |
12 | from this software without specific prior written permission. |
13 | |
14 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
15 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE |
16 | COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES |
17 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
18 | HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
19 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
20 | */ |
21 | |
22 | // LZVN low-level decoder |
23 | |
24 | #include "lzvn_decode_base.h" |
25 | |
26 | // Both the source and destination buffers are represented by a pointer and |
27 | // a length; they are *always* updated in concert using this macro; however |
28 | // many bytes the pointer is advanced, the length is decremented by the same |
29 | // amount. Thus, pointer + length always points to the byte one past the end |
30 | // of the buffer. |
31 | #define PTR_LEN_INC(_pointer, _length, _increment) \ |
32 | (_pointer += _increment, _length -= _increment) |
33 | |
34 | // Update state with current positions and distance, corresponding to the |
35 | // beginning of an instruction in both streams |
36 | #define UPDATE_GOOD \ |
37 | (state->src = src_ptr, state->dst = dst_ptr, state->d_prev = D) |
38 | |
39 | void lzvn_decode(lzvn_decoder_state *state) { |
40 | // Jump table for all instructions |
41 | static const void *opc_tbl[256] = { |
42 | &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&eos, &&lrg_d, |
43 | &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&nop, &&lrg_d, |
44 | &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&nop, &&lrg_d, |
45 | &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&udef, &&lrg_d, |
46 | &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&udef, &&lrg_d, |
47 | &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&udef, &&lrg_d, |
48 | &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&udef, &&lrg_d, |
49 | &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&udef, &&lrg_d, |
50 | &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&pre_d, &&lrg_d, |
51 | &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&pre_d, &&lrg_d, |
52 | &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&pre_d, &&lrg_d, |
53 | &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&pre_d, &&lrg_d, |
54 | &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&pre_d, &&lrg_d, |
55 | &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&pre_d, &&lrg_d, |
56 | &&udef, &&udef, &&udef, &&udef, &&udef, &&udef, &&udef, &&udef, |
57 | &&udef, &&udef, &&udef, &&udef, &&udef, &&udef, &&udef, &&udef, |
58 | &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&pre_d, &&lrg_d, |
59 | &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&pre_d, &&lrg_d, |
60 | &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&pre_d, &&lrg_d, |
61 | &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&pre_d, &&lrg_d, |
62 | &&med_d, &&med_d, &&med_d, &&med_d, &&med_d, &&med_d, &&med_d, &&med_d, |
63 | &&med_d, &&med_d, &&med_d, &&med_d, &&med_d, &&med_d, &&med_d, &&med_d, |
64 | &&med_d, &&med_d, &&med_d, &&med_d, &&med_d, &&med_d, &&med_d, &&med_d, |
65 | &&med_d, &&med_d, &&med_d, &&med_d, &&med_d, &&med_d, &&med_d, &&med_d, |
66 | &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&pre_d, &&lrg_d, |
67 | &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&pre_d, &&lrg_d, |
68 | &&udef, &&udef, &&udef, &&udef, &&udef, &&udef, &&udef, &&udef, |
69 | &&udef, &&udef, &&udef, &&udef, &&udef, &&udef, &&udef, &&udef, |
70 | &&lrg_l, &&sml_l, &&sml_l, &&sml_l, &&sml_l, &&sml_l, &&sml_l, &&sml_l, |
71 | &&sml_l, &&sml_l, &&sml_l, &&sml_l, &&sml_l, &&sml_l, &&sml_l, &&sml_l, |
72 | &&lrg_m, &&sml_m, &&sml_m, &&sml_m, &&sml_m, &&sml_m, &&sml_m, &&sml_m, |
73 | &&sml_m, &&sml_m, &&sml_m, &&sml_m, &&sml_m, &&sml_m, &&sml_m, &&sml_m}; |
74 | |
75 | size_t src_len = state->src_end - state->src; |
76 | size_t dst_len = state->dst_end - state->dst; |
77 | if (src_len == 0 || dst_len == 0) |
78 | return; // empty buffer |
79 | |
80 | const unsigned char *src_ptr = state->src; |
81 | unsigned char *dst_ptr = state->dst; |
82 | size_t D = state->d_prev; |
83 | size_t M; |
84 | size_t L; |
85 | size_t opc_len; |
86 | |
87 | // Do we have a partially expanded match saved in state? |
88 | if (state->L != 0 || state->M != 0) { |
89 | L = state->L; |
90 | M = state->M; |
91 | D = state->D; |
92 | opc_len = 0; // we already skipped the op |
93 | state->L = state->M = state->D = 0; |
94 | if (M == 0) |
95 | goto copy_literal; |
96 | if (L == 0) |
97 | goto copy_match; |
98 | goto copy_literal_and_match; |
99 | } |
100 | |
101 | unsigned char opc = src_ptr[0]; |
102 | goto *opc_tbl[opc]; |
103 | |
104 | // =============================================================== |
105 | // These four opcodes (sml_d, med_d, lrg_d, and pre_d) encode both a |
106 | // literal and a match. The bulk of their implementations are shared; |
107 | // each label here only does the work of setting the opcode length (not |
108 | // including any literal bytes), and extracting the literal length, match |
109 | // length, and match distance (except in pre_d). They then jump into the |
110 | // shared implementation to actually output the literal and match bytes. |
111 | // |
112 | // No error checking happens in the first stage, except for ensuring that |
113 | // the source has enough length to represent the full opcode before |
114 | // reading past the first byte. |
115 | sml_d: |
116 | UPDATE_GOOD; |
117 | // "small distance": This opcode has the structure LLMMMDDD DDDDDDDD LITERAL |
118 | // where the length of literal (0-3 bytes) is encoded by the high 2 bits of |
119 | // the first byte. We first extract the literal length so we know how long |
120 | // the opcode is, then check that the source can hold both this opcode and |
121 | // at least one byte of the next (because any valid input stream must be |
122 | // terminated with an eos token). |
123 | opc_len = 2; |
124 | L = (size_t)extract(opc, 6, 2); |
125 | M = (size_t)extract(opc, 3, 3) + 3; |
126 | // We need to ensure that the source buffer is long enough that we can |
127 | // safely read this entire opcode, the literal that follows, and the first |
128 | // byte of the next opcode. Once we satisfy this requirement, we can |
129 | // safely unpack the match distance. A check similar to this one is |
130 | // present in all the opcode implementations. |
131 | if (src_len <= opc_len + L) |
132 | return; // source truncated |
133 | D = (size_t)extract(opc, 0, 3) << 8 | src_ptr[1]; |
134 | goto copy_literal_and_match; |
135 | |
136 | med_d: |
137 | UPDATE_GOOD; |
138 | // "medium distance": This is a minor variant of the "small distance" |
139 | // encoding, where we will now use two extra bytes instead of one to encode |
140 | // the restof the match length and distance. This allows an extra two bits |
141 | // for the match length, and an extra three bits for the match distance. The |
142 | // full structure of the opcode is 101LLMMM DDDDDDMM DDDDDDDD LITERAL. |
143 | opc_len = 3; |
144 | L = (size_t)extract(opc, 3, 2); |
145 | if (src_len <= opc_len + L) |
146 | return; // source truncated |
147 | uint16_t opc23 = load2(&src_ptr[1]); |
148 | M = (size_t)((extract(opc, 0, 3) << 2 | extract(opc23, 0, 2)) + 3); |
149 | D = (size_t)extract(opc23, 2, 14); |
150 | goto copy_literal_and_match; |
151 | |
152 | lrg_d: |
153 | UPDATE_GOOD; |
154 | // "large distance": This is another variant of the "small distance" |
155 | // encoding, where we will now use two extra bytes to encode the match |
156 | // distance, which allows distances up to 65535 to be represented. The full |
157 | // structure of the opcode is LLMMM111 DDDDDDDD DDDDDDDD LITERAL. |
158 | opc_len = 3; |
159 | L = (size_t)extract(opc, 6, 2); |
160 | M = (size_t)extract(opc, 3, 3) + 3; |
161 | if (src_len <= opc_len + L) |
162 | return; // source truncated |
163 | D = load2(&src_ptr[1]); |
164 | goto copy_literal_and_match; |
165 | |
166 | pre_d: |
167 | UPDATE_GOOD; |
168 | // "previous distance": This opcode has the structure LLMMM110, where the |
169 | // length of the literal (0-3 bytes) is encoded by the high 2 bits of the |
170 | // first byte. We first extract the literal length so we know how long |
171 | // the opcode is, then check that the source can hold both this opcode and |
172 | // at least one byte of the next (because any valid input stream must be |
173 | // terminated with an eos token). |
174 | opc_len = 1; |
175 | L = (size_t)extract(opc, 6, 2); |
176 | M = (size_t)extract(opc, 3, 3) + 3; |
177 | if (src_len <= opc_len + L) |
178 | return; // source truncated |
179 | goto copy_literal_and_match; |
180 | |
181 | copy_literal_and_match: |
182 | // Common implementation of writing data for opcodes that have both a |
183 | // literal and a match. We begin by advancing the source pointer past |
184 | // the opcode, so that it points at the first literal byte (if L |
185 | // is non-zero; otherwise it points at the next opcode). |
186 | PTR_LEN_INC(src_ptr, src_len, opc_len); |
187 | // Now we copy the literal from the source pointer to the destination. |
188 | if (__builtin_expect(dst_len >= 4 && src_len >= 4, 1)) { |
189 | // The literal is 0-3 bytes; if we are not near the end of the buffer, |
190 | // we can safely just do a 4 byte copy (which is guaranteed to cover |
191 | // the complete literal, and may include some other bytes as well). |
192 | store4(dst_ptr, load4(src_ptr)); |
193 | } else if (L <= dst_len) { |
194 | // We are too close to the end of either the input or output stream |
195 | // to be able to safely use a four-byte copy, but we will not exhaust |
196 | // either stream (we already know that the source will not be |
197 | // exhausted from checks in the individual opcode implementations, |
198 | // and we just tested that dst_len > L). Thus, we need to do a |
199 | // byte-by-byte copy of the literal. This is slow, but it can only ever |
200 | // happen near the very end of a buffer, so it is not an important case to |
201 | // optimize. |
202 | for (size_t i = 0; i < L; ++i) |
203 | dst_ptr[i] = src_ptr[i]; |
204 | } else { |
205 | // Destination truncated: fill DST, and store partial match |
206 | |
207 | // Copy partial literal |
208 | for (size_t i = 0; i < dst_len; ++i) |
209 | dst_ptr[i] = src_ptr[i]; |
210 | // Save state |
211 | state->src = src_ptr + dst_len; |
212 | state->dst = dst_ptr + dst_len; |
213 | state->L = L - dst_len; |
214 | state->M = M; |
215 | state->D = D; |
216 | return; // destination truncated |
217 | } |
218 | // Having completed the copy of the literal, we advance both the source |
219 | // and destination pointers by the number of literal bytes. |
220 | PTR_LEN_INC(dst_ptr, dst_len, L); |
221 | PTR_LEN_INC(src_ptr, src_len, L); |
222 | // Check if the match distance is valid; matches must not reference |
223 | // bytes that preceed the start of the output buffer, nor can the match |
224 | // distance be zero. |
225 | if (D > dst_ptr - state->dst_begin || D == 0) |
226 | goto invalid_match_distance; |
227 | copy_match: |
228 | // Now we copy the match from dst_ptr - D to dst_ptr. It is important to keep |
229 | // in mind that we may have D < M, in which case the source and destination |
230 | // windows overlap in the copy. The semantics of the match copy are *not* |
231 | // those of memmove( ); if the buffers overlap it needs to behave as though |
232 | // we were copying byte-by-byte in increasing address order. If, for example, |
233 | // D is 1, the copy operation is equivalent to: |
234 | // |
235 | // memset(dst_ptr, dst_ptr[-1], M); |
236 | // |
237 | // i.e. it splats the previous byte. This means that we need to be very |
238 | // careful about using wide loads or stores to perform the copy operation. |
239 | if (__builtin_expect(dst_len >= M + 7 && D >= 8, 1)) { |
240 | // We are not near the end of the buffer, and the match distance |
241 | // is at least eight. Thus, we can safely loop using eight byte |
242 | // copies. The last of these may slop over the intended end of |
243 | // the match, but this is OK because we know we have a safety bound |
244 | // away from the end of the destination buffer. |
245 | for (size_t i = 0; i < M; i += 8) |
246 | store8(&dst_ptr[i], load8(&dst_ptr[i - D])); |
247 | } else if (M <= dst_len) { |
248 | // Either the match distance is too small, or we are too close to |
249 | // the end of the buffer to safely use eight byte copies. Fall back |
250 | // on a simple byte-by-byte implementation. |
251 | for (size_t i = 0; i < M; ++i) |
252 | dst_ptr[i] = dst_ptr[i - D]; |
253 | } else { |
254 | // Destination truncated: fill DST, and store partial match |
255 | |
256 | // Copy partial match |
257 | for (size_t i = 0; i < dst_len; ++i) |
258 | dst_ptr[i] = dst_ptr[i - D]; |
259 | // Save state |
260 | state->src = src_ptr; |
261 | state->dst = dst_ptr + dst_len; |
262 | state->L = 0; |
263 | state->M = M - dst_len; |
264 | state->D = D; |
265 | return; // destination truncated |
266 | } |
267 | // Update the destination pointer and length to account for the bytes |
268 | // written by the match, then load the next opcode byte and branch to |
269 | // the appropriate implementation. |
270 | PTR_LEN_INC(dst_ptr, dst_len, M); |
271 | opc = src_ptr[0]; |
272 | goto *opc_tbl[opc]; |
273 | |
274 | // =============================================================== |
275 | // Opcodes representing only a match (no literal). |
276 | // These two opcodes (lrg_m and sml_m) encode only a match. The match |
277 | // distance is carried over from the previous opcode, so all they need |
278 | // to encode is the match length. We are able to reuse the match copy |
279 | // sequence from the literal and match opcodes to perform the actual |
280 | // copy implementation. |
281 | sml_m: |
282 | UPDATE_GOOD; |
283 | // "small match": This opcode has no literal, and uses the previous match |
284 | // distance (i.e. it encodes only the match length), in a single byte as |
285 | // 1111MMMM. |
286 | opc_len = 1; |
287 | if (src_len <= opc_len) |
288 | return; // source truncated |
289 | M = (size_t)extract(opc, 0, 4); |
290 | PTR_LEN_INC(src_ptr, src_len, opc_len); |
291 | goto copy_match; |
292 | |
293 | lrg_m: |
294 | UPDATE_GOOD; |
295 | // "large match": This opcode has no literal, and uses the previous match |
296 | // distance (i.e. it encodes only the match length). It is encoded in two |
297 | // bytes as 11110000 MMMMMMMM. Because matches smaller than 16 bytes can |
298 | // be represented by sml_m, there is an implicit bias of 16 on the match |
299 | // length; the representable values are [16,271]. |
300 | opc_len = 2; |
301 | if (src_len <= opc_len) |
302 | return; // source truncated |
303 | M = src_ptr[1] + 16; |
304 | PTR_LEN_INC(src_ptr, src_len, opc_len); |
305 | goto copy_match; |
306 | |
307 | // =============================================================== |
308 | // Opcodes representing only a literal (no match). |
309 | // These two opcodes (lrg_l and sml_l) encode only a literal. There is no |
310 | // match length or match distance to worry about (but we need to *not* |
311 | // touch D, as it must be preserved between opcodes). |
312 | sml_l: |
313 | UPDATE_GOOD; |
314 | // "small literal": This opcode has no match, and encodes only a literal |
315 | // of length up to 15 bytes. The format is 1110LLLL LITERAL. |
316 | opc_len = 1; |
317 | L = (size_t)extract(opc, 0, 4); |
318 | goto copy_literal; |
319 | |
320 | lrg_l: |
321 | UPDATE_GOOD; |
322 | // "large literal": This opcode has no match, and uses the previous match |
323 | // distance (i.e. it encodes only the match length). It is encoded in two |
324 | // bytes as 11100000 LLLLLLLL LITERAL. Because literals smaller than 16 |
325 | // bytes can be represented by sml_l, there is an implicit bias of 16 on |
326 | // the literal length; the representable values are [16,271]. |
327 | opc_len = 2; |
328 | if (src_len <= 2) |
329 | return; // source truncated |
330 | L = src_ptr[1] + 16; |
331 | goto copy_literal; |
332 | |
333 | copy_literal: |
334 | // Check that the source buffer is large enough to hold the complete |
335 | // literal and at least the first byte of the next opcode. If so, advance |
336 | // the source pointer to point to the first byte of the literal and adjust |
337 | // the source length accordingly. |
338 | if (src_len <= opc_len + L) |
339 | return; // source truncated |
340 | PTR_LEN_INC(src_ptr, src_len, opc_len); |
341 | // Now we copy the literal from the source pointer to the destination. |
342 | if (dst_len >= L + 7 && src_len >= L + 7) { |
343 | // We are not near the end of the source or destination buffers; thus |
344 | // we can safely copy the literal using wide copies, without worrying |
345 | // about reading or writing past the end of either buffer. |
346 | for (size_t i = 0; i < L; i += 8) |
347 | store8(&dst_ptr[i], load8(&src_ptr[i])); |
348 | } else if (L <= dst_len) { |
349 | // We are too close to the end of either the input or output stream |
350 | // to be able to safely use an eight-byte copy. Instead we copy the |
351 | // literal byte-by-byte. |
352 | for (size_t i = 0; i < L; ++i) |
353 | dst_ptr[i] = src_ptr[i]; |
354 | } else { |
355 | // Destination truncated: fill DST, and store partial match |
356 | |
357 | // Copy partial literal |
358 | for (size_t i = 0; i < dst_len; ++i) |
359 | dst_ptr[i] = src_ptr[i]; |
360 | // Save state |
361 | state->src = src_ptr + dst_len; |
362 | state->dst = dst_ptr + dst_len; |
363 | state->L = L - dst_len; |
364 | state->M = 0; |
365 | state->D = D; |
366 | return; // destination truncated |
367 | } |
368 | // Having completed the copy of the literal, we advance both the source |
369 | // and destination pointers by the number of literal bytes. |
370 | PTR_LEN_INC(dst_ptr, dst_len, L); |
371 | PTR_LEN_INC(src_ptr, src_len, L); |
372 | // Load the first byte of the next opcode, and jump to its implementation. |
373 | opc = src_ptr[0]; |
374 | goto *opc_tbl[opc]; |
375 | |
376 | // =============================================================== |
377 | // Other opcodes |
378 | nop: |
379 | UPDATE_GOOD; |
380 | opc_len = 1; |
381 | if (src_len <= opc_len) |
382 | return; // source truncated |
383 | PTR_LEN_INC(src_ptr, src_len, opc_len); |
384 | opc = src_ptr[0]; |
385 | goto *opc_tbl[opc]; |
386 | |
387 | eos: |
388 | opc_len = 8; |
389 | if (src_len < opc_len) |
390 | return; // source truncated (here we don't need an extra byte for next op |
391 | // code) |
392 | PTR_LEN_INC(src_ptr, src_len, opc_len); |
393 | state->end_of_stream = 1; |
394 | UPDATE_GOOD; |
395 | return; // end-of-stream |
396 | |
397 | // =============================================================== |
398 | // Return on error |
399 | udef: |
400 | invalid_match_distance: |
401 | |
402 | return; // we already updated state |
403 | } |
404 | |