1/*
2Copyright (c) 2015-2016, Apple Inc. All rights reserved.
3
4Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
5
61. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
7
82. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer
9 in the documentation and/or other materials provided with the distribution.
10
113. Neither the name of the copyright holder(s) nor the names of any contributors may be used to endorse or promote products derived
12 from this software without specific prior written permission.
13
14THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
15LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
16COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
17(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
18HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
19ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
20*/
21
22// LZVN low-level encoder
23
24#include "lzvn_encode_base.h"
25
26// ===============================================================
27// Coarse/fine copy, non overlapping buffers
28
29/*! @abstract Copy at least \p nbytes bytes from \p src to \p dst, by blocks
30 * of 8 bytes (may go beyond range). No overlap.
31 * @return \p dst + \p nbytes. */
32static inline unsigned char *lzvn_copy64(unsigned char *restrict dst,
33 const unsigned char *restrict src,
34 size_t nbytes) {
35 for (size_t i = 0; i < nbytes; i += 8)
36 store8(dst + i, load8(src + i));
37 return dst + nbytes;
38}
39
40/*! @abstract Copy exactly \p nbytes bytes from \p src to \p dst (respects range).
41 * No overlap.
42 * @return \p dst + \p nbytes. */
43static inline unsigned char *lzvn_copy8(unsigned char *restrict dst,
44 const unsigned char *restrict src,
45 size_t nbytes) {
46 for (size_t i = 0; i < nbytes; i++)
47 dst[i] = src[i];
48 return dst + nbytes;
49}
50
51/*! @abstract Emit (L,0,0) instructions (final literal).
52 * We read at most \p L bytes from \p p.
53 * @param p input stream
54 * @param q1 the first byte after the output buffer.
55 * @return pointer to the next output, <= \p q1.
56 * @return \p q1 if output is full. In that case, output will be partially invalid.
57 */
58static inline unsigned char *emit_literal(const unsigned char *p,
59 unsigned char *q, unsigned char *q1,
60 size_t L) {
61 size_t x;
62 while (L > 15) {
63 x = L < 271 ? L : 271;
64 if (q + x + 10 >= q1)
65 goto OUT_FULL;
66 *(uint16_t *)q = 0xE0 + ((x - 16) << 8);
67 q += 2; // non-aligned access OK
68 L -= x;
69 q = lzvn_copy8(q, p, x);
70 p += x;
71 }
72 if (L > 0) {
73 if (q + L + 10 >= q1)
74 goto OUT_FULL;
75 *q++ = 0xE0 + L; // 1110LLLL
76 q = lzvn_copy8(q, p, L);
77 }
78 return q;
79
80OUT_FULL:
81 return q1;
82}
83
84/*! @abstract Emit (L,M,D) instructions. M>=3.
85 * @param p input stream pointing to the beginning of the literal. We read at
86 * most \p L+4 bytes from \p p.
87 * @param q1 the first byte after the output buffer.
88 * @return pointer to the next output, <= \p q1.
89 * @return \p q1 if output is full. In that case, output will be partially invalid.
90 */
91static inline unsigned char *emit(const unsigned char *p, unsigned char *q,
92 unsigned char *q1, size_t L, size_t M,
93 size_t D, size_t D_prev) {
94 size_t x;
95 while (L > 15) {
96 x = L < 271 ? L : 271;
97 if (q + x + 10 >= q1)
98 goto OUT_FULL;
99 *(uint16_t *)q = 0xE0 + ((x - 16) << 8);
100 q += 2; // non-aligned access OK
101 L -= x;
102 q = lzvn_copy64(q, p, x);
103 p += x;
104 }
105 if (L > 3) {
106 if (q + L + 10 >= q1)
107 goto OUT_FULL;
108 *q++ = 0xE0 + L; // 1110LLLL
109 q = lzvn_copy64(q, p, L);
110 p += L;
111 L = 0;
112 }
113 x = M <= 10 - 2 * L ? M : 10 - 2 * L; // x = min(10-2*L,M)
114 M -= x;
115 x -= 3; // M = (x+3) + M' max value for x is 7-2*L
116
117 // Here L<4 literals remaining, we read them here
118 uint32_t literal =
119 *(uint32_t *)p; // read 4 literal bytes, non-aligned access OK
120 // P is not accessed after this point
121
122 // Relaxed capacity test covering all cases
123 if (q + 8 >= q1)
124 goto OUT_FULL;
125
126 if (D == D_prev) {
127 if (L == 0) {
128 *q++ = 0xF0 + (x + 3); // XM!
129 } else {
130 *q++ = (L << 6) + (x << 3) + 6; // LLxxx110
131 }
132 *(uint32_t *)q = literal;
133 q += L; // non-aligned access OK
134 } else if (D < 2048 - 2 * 256) {
135 // Short dist D>>8 in 0..5
136 *q++ = (D >> 8) + (L << 6) + (x << 3); // LLxxxDDD
137 *q++ = D & 0xFF;
138 *(uint32_t *)q = literal;
139 q += L; // non-aligned access OK
140 } else if (D >= (1 << 14) || M == 0 || (x + 3) + M > 34) {
141 // Long dist
142 *q++ = (L << 6) + (x << 3) + 7;
143 *(uint16_t *)q = D;
144 q += 2; // non-aligned access OK
145 *(uint32_t *)q = literal;
146 q += L; // non-aligned access OK
147 } else {
148 // Medium distance
149 x += M;
150 M = 0;
151 *q++ = 0xA0 + (x >> 2) + (L << 3);
152 *(uint16_t *)q = D << 2 | (x & 3);
153 q += 2; // non-aligned access OK
154 *(uint32_t *)q = literal;
155 q += L; // non-aligned access OK
156 }
157
158 // Issue remaining match
159 while (M > 15) {
160 if (q + 2 >= q1)
161 goto OUT_FULL;
162 x = M < 271 ? M : 271;
163 *(uint16_t *)q = 0xf0 + ((x - 16) << 8);
164 q += 2; // non-aligned access OK
165 M -= x;
166 }
167 if (M > 0) {
168 if (q + 1 >= q1)
169 goto OUT_FULL;
170 *q++ = 0xF0 + M; // M = 0..15
171 }
172
173 return q;
174
175OUT_FULL:
176 return q1;
177}
178
179// ===============================================================
180// Conversions
181
182/*! @abstract Return 32-bit value to store for offset x. */
183static inline int32_t offset_to_s32(lzvn_offset x) { return (int32_t)x; }
184
185/*! @abstract Get offset from 32-bit stored value x. */
186static inline lzvn_offset offset_from_s32(int32_t x) { return (lzvn_offset)x; }
187
188// ===============================================================
189// Hash and Matching
190
191/*! @abstract Get hash in range \c [0,LZVN_ENCODE_HASH_VALUES-1] from 3 bytes in i. */
192static inline uint32_t hash3i(uint32_t i) {
193 i &= 0xffffff; // truncate to 24-bit input (slightly increases compression ratio)
194 uint32_t h = (i * (1 + (1 << 6) + (1 << 12))) >> 12;
195 return h & (LZVN_ENCODE_HASH_VALUES - 1);
196}
197
198/*! @abstract Return the number [0, 4] of zero bytes in \p x, starting from the
199 * least significant byte. */
200static inline lzvn_offset trailing_zero_bytes(uint32_t x) {
201 return (x == 0) ? 4 : (__builtin_ctzl(x) >> 3);
202}
203
204/*! @abstract Return the number [0, 4] of matching chars between values at
205 * \p src+i and \p src+j, starting from the least significant byte.
206 * Assumes we can read 4 chars from each position. */
207static inline lzvn_offset nmatch4(const unsigned char *src, lzvn_offset i,
208 lzvn_offset j) {
209 uint32_t vi = load4(src + i);
210 uint32_t vj = load4(src + j);
211 return trailing_zero_bytes(vi ^ vj);
212}
213
214/*! @abstract Check if l_begin, m_begin, m0_begin (m0_begin < m_begin) can be
215 * expanded to a match of length at least 3.
216 * @param m_begin new string to match.
217 * @param m0_begin candidate old string.
218 * @param src source buffer, with valid indices src_begin <= i < src_end.
219 * (src_begin may be <0)
220 * @return If a match can be found, return 1 and set all \p match fields,
221 * otherwise return 0.
222 * @note \p *match should be 0 before the call. */
223static inline int lzvn_find_match(const unsigned char *src,
224 lzvn_offset src_begin,
225 lzvn_offset src_end, lzvn_offset l_begin,
226 lzvn_offset m0_begin, lzvn_offset m_begin,
227 lzvn_match_info *match) {
228 lzvn_offset n = nmatch4(src, m_begin, m0_begin);
229 if (n < 3)
230 return 0; // no match
231
232 lzvn_offset D = m_begin - m0_begin; // actual distance
233 if (D <= 0 || D > LZVN_ENCODE_MAX_DISTANCE)
234 return 0; // distance out of range
235
236 // Expand forward
237 lzvn_offset m_end = m_begin + n;
238 while (n == 4 && m_end + 4 < src_end) {
239 n = nmatch4(src, m_end, m_end - D);
240 m_end += n;
241 }
242
243 // Expand backwards over literal
244 while (m0_begin > src_begin && m_begin > l_begin &&
245 src[m_begin - 1] == src[m0_begin - 1]) {
246 m0_begin--;
247 m_begin--;
248 }
249
250 // OK, we keep it, update MATCH
251 lzvn_offset M = m_end - m_begin; // match length
252 match->m_begin = m_begin;
253 match->m_end = m_end;
254 match->K = M - ((D < 0x600) ? 2 : 3);
255 match->M = M;
256 match->D = D;
257
258 return 1; // OK
259}
260
261/*! @abstract Same as lzvn_find_match, but we already know that N bytes do
262 * match (N<=4). */
263static inline int lzvn_find_matchN(const unsigned char *src,
264 lzvn_offset src_begin,
265 lzvn_offset src_end, lzvn_offset l_begin,
266 lzvn_offset m0_begin, lzvn_offset m_begin,
267 lzvn_offset n, lzvn_match_info *match) {
268 // We can skip the first comparison on 4 bytes
269 if (n < 3)
270 return 0; // no match
271
272 lzvn_offset D = m_begin - m0_begin; // actual distance
273 if (D <= 0 || D > LZVN_ENCODE_MAX_DISTANCE)
274 return 0; // distance out of range
275
276 // Expand forward
277 lzvn_offset m_end = m_begin + n;
278 while (n == 4 && m_end + 4 < src_end) {
279 n = nmatch4(src, m_end, m_end - D);
280 m_end += n;
281 }
282
283 // Expand backwards over literal
284 while (m0_begin > src_begin && m_begin > l_begin &&
285 src[m_begin - 1] == src[m0_begin - 1]) {
286 m0_begin--;
287 m_begin--;
288 }
289
290 // OK, we keep it, update MATCH
291 lzvn_offset M = m_end - m_begin; // match length
292 match->m_begin = m_begin;
293 match->m_end = m_end;
294 match->K = M - ((D < 0x600) ? 2 : 3);
295 match->M = M;
296 match->D = D;
297
298 return 1; // OK
299}
300
301// ===============================================================
302// Encoder Backend
303
304/*! @abstract Emit a match and update state.
305 * @return number of bytes written to \p dst. May be 0 if there is no more space
306 * in \p dst to emit the match. */
307static inline lzvn_offset lzvn_emit_match(lzvn_encoder_state *state,
308 lzvn_match_info match) {
309 size_t L = (size_t)(match.m_begin - state->src_literal); // literal count
310 size_t M = (size_t)match.M; // match length
311 size_t D = (size_t)match.D; // match distance
312 size_t D_prev = (size_t)state->d_prev; // previously emitted match distance
313 unsigned char *dst = emit(state->src + state->src_literal, state->dst,
314 state->dst_end, L, M, D, D_prev);
315 // Check if DST is full
316 if (dst >= state->dst_end) {
317 return 0; // FULL
318 }
319
320 // Update state
321 lzvn_offset dst_used = dst - state->dst;
322 state->d_prev = match.D;
323 state->dst = dst;
324 state->src_literal = match.m_end;
325 return dst_used;
326}
327
328/*! @abstract Emit a n-bytes literal and update state.
329 * @return number of bytes written to \p dst. May be 0 if there is no more space
330 * in \p dst to emit the literal. */
331static inline lzvn_offset lzvn_emit_literal(lzvn_encoder_state *state,
332 lzvn_offset n) {
333 size_t L = (size_t)n;
334 unsigned char *dst = emit_literal(state->src + state->src_literal, state->dst,
335 state->dst_end, L);
336 // Check if DST is full
337 if (dst >= state->dst_end)
338 return 0; // FULL
339
340 // Update state
341 lzvn_offset dst_used = dst - state->dst;
342 state->dst = dst;
343 state->src_literal += n;
344 return dst_used;
345}
346
347/*! @abstract Emit end-of-stream and update state.
348 * @return number of bytes written to \p dst. May be 0 if there is no more space
349 * in \p dst to emit the instruction. */
350static inline lzvn_offset lzvn_emit_end_of_stream(lzvn_encoder_state *state) {
351 // Do we have 8 byte in dst?
352 if (state->dst_end < state->dst + 8)
353 return 0; // FULL
354
355 // Insert end marker and update state
356 store8(state->dst, 0x06); // end-of-stream command
357 state->dst += 8;
358 return 8; // dst_used
359}
360
361// ===============================================================
362// Encoder Functions
363
364/*! @abstract Initialize encoder table in \p state, uses current I/O parameters. */
365static inline void lzvn_init_table(lzvn_encoder_state *state) {
366 lzvn_offset index = -LZVN_ENCODE_MAX_DISTANCE; // max match distance
367 if (index < state->src_begin)
368 index = state->src_begin;
369 uint32_t value = load4(state->src + index);
370
371 lzvn_encode_entry_type e;
372 for (int i = 0; i < 4; i++) {
373 e.indices[i] = offset_to_s32(index);
374 e.values[i] = value;
375 }
376 for (int u = 0; u < LZVN_ENCODE_HASH_VALUES; u++)
377 state->table[u] = e; // fill entire table
378}
379
380void lzvn_encode(lzvn_encoder_state *state) {
381 const lzvn_match_info NO_MATCH = {0};
382
383 for (; state->src_current < state->src_current_end; state->src_current++) {
384 // Get 4 bytes at src_current
385 uint32_t vi = load4(state->src + state->src_current);
386
387 // Compute new hash H at position I, and push value into position table
388 int h = hash3i(vi); // index of first entry
389
390 // Read table entries for H
391 lzvn_encode_entry_type e = state->table[h];
392
393 // Update entry with index=current and value=vi
394 lzvn_encode_entry_type updated_e; // rotate values, so we will replace the oldest
395 updated_e.indices[0] = offset_to_s32(state->src_current);
396 updated_e.indices[1] = e.indices[0];
397 updated_e.indices[2] = e.indices[1];
398 updated_e.indices[3] = e.indices[2];
399 updated_e.values[0] = vi;
400 updated_e.values[1] = e.values[0];
401 updated_e.values[2] = e.values[1];
402 updated_e.values[3] = e.values[2];
403
404 // Do not check matches if still in previously emitted match
405 if (state->src_current < state->src_literal)
406 goto after_emit;
407
408// Update best with candidate if better
409#define UPDATE(best, candidate) \
410 do { \
411 if (candidate.K > best.K || \
412 ((candidate.K == best.K) && (candidate.m_end > best.m_end + 1))) { \
413 best = candidate; \
414 } \
415 } while (0)
416// Check candidate. Keep if better.
417#define CHECK_CANDIDATE(ik, nk) \
418 do { \
419 lzvn_match_info m1; \
420 if (lzvn_find_matchN(state->src, state->src_begin, state->src_end, \
421 state->src_literal, ik, state->src_current, nk, &m1)) { \
422 UPDATE(incoming, m1); \
423 } \
424 } while (0)
425// Emit match M. Return if we don't have enough space in the destination buffer
426#define EMIT_MATCH(m) \
427 do { \
428 if (lzvn_emit_match(state, m) == 0) \
429 return; \
430 } while (0)
431// Emit literal of length L. Return if we don't have enough space in the
432// destination buffer
433#define EMIT_LITERAL(l) \
434 do { \
435 if (lzvn_emit_literal(state, l) == 0) \
436 return; \
437 } while (0)
438
439 lzvn_match_info incoming = NO_MATCH;
440
441 // Check candidates in order (closest first)
442 uint32_t diffs[4];
443 for (int k = 0; k < 4; k++)
444 diffs[k] = e.values[k] ^ vi; // XOR, 0 if equal
445 lzvn_offset ik; // index
446 lzvn_offset nk; // match byte count
447
448 // The values stored in e.xyzw are 32-bit signed indices, extended to signed
449 // type lzvn_offset
450 ik = offset_from_s32(e.indices[0]);
451 nk = trailing_zero_bytes(diffs[0]);
452 CHECK_CANDIDATE(ik, nk);
453 ik = offset_from_s32(e.indices[1]);
454 nk = trailing_zero_bytes(diffs[1]);
455 CHECK_CANDIDATE(ik, nk);
456 ik = offset_from_s32(e.indices[2]);
457 nk = trailing_zero_bytes(diffs[2]);
458 CHECK_CANDIDATE(ik, nk);
459 ik = offset_from_s32(e.indices[3]);
460 nk = trailing_zero_bytes(diffs[3]);
461 CHECK_CANDIDATE(ik, nk);
462
463 // Check candidate at previous distance
464 if (state->d_prev != 0) {
465 lzvn_match_info m1;
466 if (lzvn_find_match(state->src, state->src_begin, state->src_end,
467 state->src_literal, state->src_current - state->d_prev,
468 state->src_current, &m1)) {
469 m1.K = m1.M - 1; // fix K for D_prev
470 UPDATE(incoming, m1);
471 }
472 }
473
474 // Here we have the best candidate in incoming, may be NO_MATCH
475
476 // If no incoming match, and literal backlog becomes too high, emit pending
477 // match, or literals if there is no pending match
478 if (incoming.M == 0) {
479 if (state->src_current - state->src_literal >=
480 LZVN_ENCODE_MAX_LITERAL_BACKLOG) // at this point, we always have
481 // current >= literal
482 {
483 if (state->pending.M != 0) {
484 EMIT_MATCH(state->pending);
485 state->pending = NO_MATCH;
486 } else {
487 EMIT_LITERAL(271); // emit long literal (271 is the longest literal size we allow)
488 }
489 }
490 goto after_emit;
491 }
492
493 if (state->pending.M == 0) {
494 // NOTE. Here, we can also emit incoming right away. It will make the
495 // encoder 1.5x faster, at a cost of ~10% lower compression ratio:
496 // EMIT_MATCH(incoming);
497 // state->pending = NO_MATCH;
498
499 // No pending match, emit nothing, keep incoming
500 state->pending = incoming;
501 } else {
502 // Here we have both incoming and pending
503 if (state->pending.m_end <= incoming.m_begin) {
504 // No overlap: emit pending, keep incoming
505 EMIT_MATCH(state->pending);
506 state->pending = incoming;
507 } else {
508 // If pending is better, emit pending and discard incoming.
509 // Otherwise, emit incoming and discard pending.
510 if (incoming.K > state->pending.K)
511 state->pending = incoming;
512 EMIT_MATCH(state->pending);
513 state->pending = NO_MATCH;
514 }
515 }
516
517 after_emit:
518
519 // We commit state changes only after we tried to emit instructions, so we
520 // can restart in the same state in case dst was full and we quit the loop.
521 state->table[h] = updated_e;
522
523 } // i loop
524
525 // Do not emit pending match here. We do it only at the end of stream.
526}
527
528// ===============================================================
529// API entry points
530
531size_t lzvn_encode_scratch_size(void) { return LZVN_ENCODE_WORK_SIZE; }
532
533static size_t lzvn_encode_partial(void *__restrict dst, size_t dst_size,
534 const void *__restrict src, size_t src_size,
535 size_t *src_used, void *__restrict work) {
536 // Min size checks to avoid accessing memory outside buffers.
537 if (dst_size < LZVN_ENCODE_MIN_DST_SIZE) {
538 *src_used = 0;
539 return 0;
540 }
541 // Max input size check (limit to offsets on uint32_t).
542 if (src_size > LZVN_ENCODE_MAX_SRC_SIZE) {
543 src_size = LZVN_ENCODE_MAX_SRC_SIZE;
544 }
545
546 // Setup encoder state
547 lzvn_encoder_state state;
548 memset(&state, 0, sizeof(state));
549
550 state.src = src;
551 state.src_begin = 0;
552 state.src_end = (lzvn_offset)src_size;
553 state.src_literal = 0;
554 state.src_current = 0;
555 state.dst = dst;
556 state.dst_begin = dst;
557 state.dst_end = dst + dst_size - 8; // reserve 8 bytes for end-of-stream
558 state.table = work;
559
560 // Do not encode if the input buffer is too small. We'll emit a literal instead.
561 if (src_size >= LZVN_ENCODE_MIN_SRC_SIZE) {
562
563 state.src_current_end = (lzvn_offset)src_size - LZVN_ENCODE_MIN_MARGIN;
564 lzvn_init_table(&state);
565 lzvn_encode(&state);
566
567 }
568
569 // No need to test the return value: src_literal will not be updated on failure,
570 // and we will fail later.
571 lzvn_emit_literal(&state, state.src_end - state.src_literal);
572
573 // Restore original size, so end-of-stream always succeeds, and emit it
574 state.dst_end = dst + dst_size;
575 lzvn_emit_end_of_stream(&state);
576
577 *src_used = state.src_literal;
578 return (size_t)(state.dst - state.dst_begin);
579}
580
581size_t lzvn_encode_buffer(void *__restrict dst, size_t dst_size,
582 const void *__restrict src, size_t src_size,
583 void *__restrict work) {
584 size_t src_used = 0;
585 size_t dst_used =
586 lzvn_encode_partial(dst, dst_size, src, src_size, &src_used, work);
587 if (src_used != src_size)
588 return 0; // could not encode entire input stream = fail
589 return dst_used; // return encoded size
590}
591