1 | // SPDX-License-Identifier: GPL-2.0-only |
2 | /* |
3 | * LZO1X Compressor from LZO |
4 | * |
5 | * Copyright (C) 1996-2012 Markus F.X.J. Oberhumer <markus@oberhumer.com> |
6 | * |
7 | * The full LZO package can be found at: |
8 | * http://www.oberhumer.com/opensource/lzo/ |
9 | * |
10 | * Changed for Linux kernel use by: |
11 | * Nitin Gupta <nitingupta910@gmail.com> |
12 | * Richard Purdie <rpurdie@openedhand.com> |
13 | */ |
14 | |
15 | #include <linux/module.h> |
16 | #include <linux/kernel.h> |
17 | #include <asm/unaligned.h> |
18 | #include <linux/lzo.h> |
19 | #include "lzodefs.h" |
20 | |
21 | static noinline size_t |
22 | lzo1x_1_do_compress(const unsigned char *in, size_t in_len, |
23 | unsigned char *out, size_t *out_len, |
24 | size_t ti, void *wrkmem, signed char *state_offset, |
25 | const unsigned char bitstream_version) |
26 | { |
27 | const unsigned char *ip; |
28 | unsigned char *op; |
29 | const unsigned char * const in_end = in + in_len; |
30 | const unsigned char * const ip_end = in + in_len - 20; |
31 | const unsigned char *ii; |
32 | lzo_dict_t * const dict = (lzo_dict_t *) wrkmem; |
33 | |
34 | op = out; |
35 | ip = in; |
36 | ii = ip; |
37 | ip += ti < 4 ? 4 - ti : 0; |
38 | |
39 | for (;;) { |
40 | const unsigned char *m_pos = NULL; |
41 | size_t t, m_len, m_off; |
42 | u32 dv; |
43 | u32 run_length = 0; |
44 | literal: |
45 | ip += 1 + ((ip - ii) >> 5); |
46 | next: |
47 | if (unlikely(ip >= ip_end)) |
48 | break; |
49 | dv = get_unaligned_le32(p: ip); |
50 | |
51 | if (dv == 0 && bitstream_version) { |
52 | const unsigned char *ir = ip + 4; |
53 | const unsigned char *limit = min(ip_end, ip + MAX_ZERO_RUN_LENGTH + 1); |
54 | #if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && \ |
55 | defined(LZO_FAST_64BIT_MEMORY_ACCESS) |
56 | u64 dv64; |
57 | |
58 | for (; (ir + 32) <= limit; ir += 32) { |
59 | dv64 = get_unaligned((u64 *)ir); |
60 | dv64 |= get_unaligned((u64 *)ir + 1); |
61 | dv64 |= get_unaligned((u64 *)ir + 2); |
62 | dv64 |= get_unaligned((u64 *)ir + 3); |
63 | if (dv64) |
64 | break; |
65 | } |
66 | for (; (ir + 8) <= limit; ir += 8) { |
67 | dv64 = get_unaligned((u64 *)ir); |
68 | if (dv64) { |
69 | # if defined(__LITTLE_ENDIAN) |
70 | ir += __builtin_ctzll(dv64) >> 3; |
71 | # elif defined(__BIG_ENDIAN) |
72 | ir += __builtin_clzll(dv64) >> 3; |
73 | # else |
74 | # error "missing endian definition" |
75 | # endif |
76 | break; |
77 | } |
78 | } |
79 | #else |
80 | while ((ir < (const unsigned char *) |
81 | ALIGN((uintptr_t)ir, 4)) && |
82 | (ir < limit) && (*ir == 0)) |
83 | ir++; |
84 | if (IS_ALIGNED((uintptr_t)ir, 4)) { |
85 | for (; (ir + 4) <= limit; ir += 4) { |
86 | dv = *((u32 *)ir); |
87 | if (dv) { |
88 | # if defined(__LITTLE_ENDIAN) |
89 | ir += __builtin_ctz(dv) >> 3; |
90 | # elif defined(__BIG_ENDIAN) |
91 | ir += __builtin_clz(dv) >> 3; |
92 | # else |
93 | # error "missing endian definition" |
94 | # endif |
95 | break; |
96 | } |
97 | } |
98 | } |
99 | #endif |
100 | while (likely(ir < limit) && unlikely(*ir == 0)) |
101 | ir++; |
102 | run_length = ir - ip; |
103 | if (run_length > MAX_ZERO_RUN_LENGTH) |
104 | run_length = MAX_ZERO_RUN_LENGTH; |
105 | } else { |
106 | t = ((dv * 0x1824429d) >> (32 - D_BITS)) & D_MASK; |
107 | m_pos = in + dict[t]; |
108 | dict[t] = (lzo_dict_t) (ip - in); |
109 | if (unlikely(dv != get_unaligned_le32(m_pos))) |
110 | goto literal; |
111 | } |
112 | |
113 | ii -= ti; |
114 | ti = 0; |
115 | t = ip - ii; |
116 | if (t != 0) { |
117 | if (t <= 3) { |
118 | op[*state_offset] |= t; |
119 | COPY4(op, ii); |
120 | op += t; |
121 | } else if (t <= 16) { |
122 | *op++ = (t - 3); |
123 | COPY8(op, ii); |
124 | COPY8(op + 8, ii + 8); |
125 | op += t; |
126 | } else { |
127 | if (t <= 18) { |
128 | *op++ = (t - 3); |
129 | } else { |
130 | size_t tt = t - 18; |
131 | *op++ = 0; |
132 | while (unlikely(tt > 255)) { |
133 | tt -= 255; |
134 | *op++ = 0; |
135 | } |
136 | *op++ = tt; |
137 | } |
138 | do { |
139 | COPY8(op, ii); |
140 | COPY8(op + 8, ii + 8); |
141 | op += 16; |
142 | ii += 16; |
143 | t -= 16; |
144 | } while (t >= 16); |
145 | if (t > 0) do { |
146 | *op++ = *ii++; |
147 | } while (--t > 0); |
148 | } |
149 | } |
150 | |
151 | if (unlikely(run_length)) { |
152 | ip += run_length; |
153 | run_length -= MIN_ZERO_RUN_LENGTH; |
154 | put_unaligned_le32(val: (run_length << 21) | 0xfffc18 |
155 | | (run_length & 0x7), p: op); |
156 | op += 4; |
157 | run_length = 0; |
158 | *state_offset = -3; |
159 | goto finished_writing_instruction; |
160 | } |
161 | |
162 | m_len = 4; |
163 | { |
164 | #if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && defined(LZO_USE_CTZ64) |
165 | u64 v; |
166 | v = get_unaligned((const u64 *) (ip + m_len)) ^ |
167 | get_unaligned((const u64 *) (m_pos + m_len)); |
168 | if (unlikely(v == 0)) { |
169 | do { |
170 | m_len += 8; |
171 | v = get_unaligned((const u64 *) (ip + m_len)) ^ |
172 | get_unaligned((const u64 *) (m_pos + m_len)); |
173 | if (unlikely(ip + m_len >= ip_end)) |
174 | goto m_len_done; |
175 | } while (v == 0); |
176 | } |
177 | # if defined(__LITTLE_ENDIAN) |
178 | m_len += (unsigned) __builtin_ctzll(v) / 8; |
179 | # elif defined(__BIG_ENDIAN) |
180 | m_len += (unsigned) __builtin_clzll(v) / 8; |
181 | # else |
182 | # error "missing endian definition" |
183 | # endif |
184 | #elif defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && defined(LZO_USE_CTZ32) |
185 | u32 v; |
186 | v = get_unaligned((const u32 *) (ip + m_len)) ^ |
187 | get_unaligned((const u32 *) (m_pos + m_len)); |
188 | if (unlikely(v == 0)) { |
189 | do { |
190 | m_len += 4; |
191 | v = get_unaligned((const u32 *) (ip + m_len)) ^ |
192 | get_unaligned((const u32 *) (m_pos + m_len)); |
193 | if (v != 0) |
194 | break; |
195 | m_len += 4; |
196 | v = get_unaligned((const u32 *) (ip + m_len)) ^ |
197 | get_unaligned((const u32 *) (m_pos + m_len)); |
198 | if (unlikely(ip + m_len >= ip_end)) |
199 | goto m_len_done; |
200 | } while (v == 0); |
201 | } |
202 | # if defined(__LITTLE_ENDIAN) |
203 | m_len += (unsigned) __builtin_ctz(v) / 8; |
204 | # elif defined(__BIG_ENDIAN) |
205 | m_len += (unsigned) __builtin_clz(v) / 8; |
206 | # else |
207 | # error "missing endian definition" |
208 | # endif |
209 | #else |
210 | if (unlikely(ip[m_len] == m_pos[m_len])) { |
211 | do { |
212 | m_len += 1; |
213 | if (ip[m_len] != m_pos[m_len]) |
214 | break; |
215 | m_len += 1; |
216 | if (ip[m_len] != m_pos[m_len]) |
217 | break; |
218 | m_len += 1; |
219 | if (ip[m_len] != m_pos[m_len]) |
220 | break; |
221 | m_len += 1; |
222 | if (ip[m_len] != m_pos[m_len]) |
223 | break; |
224 | m_len += 1; |
225 | if (ip[m_len] != m_pos[m_len]) |
226 | break; |
227 | m_len += 1; |
228 | if (ip[m_len] != m_pos[m_len]) |
229 | break; |
230 | m_len += 1; |
231 | if (ip[m_len] != m_pos[m_len]) |
232 | break; |
233 | m_len += 1; |
234 | if (unlikely(ip + m_len >= ip_end)) |
235 | goto m_len_done; |
236 | } while (ip[m_len] == m_pos[m_len]); |
237 | } |
238 | #endif |
239 | } |
240 | m_len_done: |
241 | |
242 | m_off = ip - m_pos; |
243 | ip += m_len; |
244 | if (m_len <= M2_MAX_LEN && m_off <= M2_MAX_OFFSET) { |
245 | m_off -= 1; |
246 | *op++ = (((m_len - 1) << 5) | ((m_off & 7) << 2)); |
247 | *op++ = (m_off >> 3); |
248 | } else if (m_off <= M3_MAX_OFFSET) { |
249 | m_off -= 1; |
250 | if (m_len <= M3_MAX_LEN) |
251 | *op++ = (M3_MARKER | (m_len - 2)); |
252 | else { |
253 | m_len -= M3_MAX_LEN; |
254 | *op++ = M3_MARKER | 0; |
255 | while (unlikely(m_len > 255)) { |
256 | m_len -= 255; |
257 | *op++ = 0; |
258 | } |
259 | *op++ = (m_len); |
260 | } |
261 | *op++ = (m_off << 2); |
262 | *op++ = (m_off >> 6); |
263 | } else { |
264 | m_off -= 0x4000; |
265 | if (m_len <= M4_MAX_LEN) |
266 | *op++ = (M4_MARKER | ((m_off >> 11) & 8) |
267 | | (m_len - 2)); |
268 | else { |
269 | if (unlikely(((m_off & 0x403f) == 0x403f) |
270 | && (m_len >= 261) |
271 | && (m_len <= 264)) |
272 | && likely(bitstream_version)) { |
273 | // Under lzo-rle, block copies |
274 | // for 261 <= length <= 264 and |
275 | // (distance & 0x80f3) == 0x80f3 |
276 | // can result in ambiguous |
277 | // output. Adjust length |
278 | // to 260 to prevent ambiguity. |
279 | ip -= m_len - 260; |
280 | m_len = 260; |
281 | } |
282 | m_len -= M4_MAX_LEN; |
283 | *op++ = (M4_MARKER | ((m_off >> 11) & 8)); |
284 | while (unlikely(m_len > 255)) { |
285 | m_len -= 255; |
286 | *op++ = 0; |
287 | } |
288 | *op++ = (m_len); |
289 | } |
290 | *op++ = (m_off << 2); |
291 | *op++ = (m_off >> 6); |
292 | } |
293 | *state_offset = -2; |
294 | finished_writing_instruction: |
295 | ii = ip; |
296 | goto next; |
297 | } |
298 | *out_len = op - out; |
299 | return in_end - (ii - ti); |
300 | } |
301 | |
302 | static int lzogeneric1x_1_compress(const unsigned char *in, size_t in_len, |
303 | unsigned char *out, size_t *out_len, |
304 | void *wrkmem, const unsigned char bitstream_version) |
305 | { |
306 | const unsigned char *ip = in; |
307 | unsigned char *op = out; |
308 | unsigned char *data_start; |
309 | size_t l = in_len; |
310 | size_t t = 0; |
311 | signed char state_offset = -2; |
312 | unsigned int m4_max_offset; |
313 | |
314 | // LZO v0 will never write 17 as first byte (except for zero-length |
315 | // input), so this is used to version the bitstream |
316 | if (bitstream_version > 0) { |
317 | *op++ = 17; |
318 | *op++ = bitstream_version; |
319 | m4_max_offset = M4_MAX_OFFSET_V1; |
320 | } else { |
321 | m4_max_offset = M4_MAX_OFFSET_V0; |
322 | } |
323 | |
324 | data_start = op; |
325 | |
326 | while (l > 20) { |
327 | size_t ll = min_t(size_t, l, m4_max_offset + 1); |
328 | uintptr_t ll_end = (uintptr_t) ip + ll; |
329 | if ((ll_end + ((t + ll) >> 5)) <= ll_end) |
330 | break; |
331 | BUILD_BUG_ON(D_SIZE * sizeof(lzo_dict_t) > LZO1X_1_MEM_COMPRESS); |
332 | memset(wrkmem, 0, D_SIZE * sizeof(lzo_dict_t)); |
333 | t = lzo1x_1_do_compress(in: ip, in_len: ll, out: op, out_len, ti: t, wrkmem, |
334 | state_offset: &state_offset, bitstream_version); |
335 | ip += ll; |
336 | op += *out_len; |
337 | l -= ll; |
338 | } |
339 | t += l; |
340 | |
341 | if (t > 0) { |
342 | const unsigned char *ii = in + in_len - t; |
343 | |
344 | if (op == data_start && t <= 238) { |
345 | *op++ = (17 + t); |
346 | } else if (t <= 3) { |
347 | op[state_offset] |= t; |
348 | } else if (t <= 18) { |
349 | *op++ = (t - 3); |
350 | } else { |
351 | size_t tt = t - 18; |
352 | *op++ = 0; |
353 | while (tt > 255) { |
354 | tt -= 255; |
355 | *op++ = 0; |
356 | } |
357 | *op++ = tt; |
358 | } |
359 | if (t >= 16) do { |
360 | COPY8(op, ii); |
361 | COPY8(op + 8, ii + 8); |
362 | op += 16; |
363 | ii += 16; |
364 | t -= 16; |
365 | } while (t >= 16); |
366 | if (t > 0) do { |
367 | *op++ = *ii++; |
368 | } while (--t > 0); |
369 | } |
370 | |
371 | *op++ = M4_MARKER | 1; |
372 | *op++ = 0; |
373 | *op++ = 0; |
374 | |
375 | *out_len = op - out; |
376 | return LZO_E_OK; |
377 | } |
378 | |
379 | int lzo1x_1_compress(const unsigned char *in, size_t in_len, |
380 | unsigned char *out, size_t *out_len, |
381 | void *wrkmem) |
382 | { |
383 | return lzogeneric1x_1_compress(in, in_len, out, out_len, wrkmem, bitstream_version: 0); |
384 | } |
385 | |
386 | int lzorle1x_1_compress(const unsigned char *in, size_t in_len, |
387 | unsigned char *out, size_t *out_len, |
388 | void *wrkmem) |
389 | { |
390 | return lzogeneric1x_1_compress(in, in_len, out, out_len, |
391 | wrkmem, LZO_VERSION); |
392 | } |
393 | |
394 | EXPORT_SYMBOL_GPL(lzo1x_1_compress); |
395 | EXPORT_SYMBOL_GPL(lzorle1x_1_compress); |
396 | |
397 | MODULE_LICENSE("GPL" ); |
398 | MODULE_DESCRIPTION("LZO1X-1 Compressor" ); |
399 | |