1 | /* |
2 | LZ4 HC Encoder - Part of LZ4 HC algorithm |
3 | Copyright (C) 2011-2013, Yann Collet. |
4 | BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) |
5 | |
6 | Redistribution and use in source and binary forms, with or without |
7 | modification, are permitted provided that the following conditions are |
8 | met: |
9 | |
10 | * Redistributions of source code must retain the above copyright |
11 | notice, this list of conditions and the following disclaimer. |
12 | * Redistributions in binary form must reproduce the above |
13 | copyright notice, this list of conditions and the following disclaimer |
14 | in the documentation and/or other materials provided with the |
15 | distribution. |
16 | |
17 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
18 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
19 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
20 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
21 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
22 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
23 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
24 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
25 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
26 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
27 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
28 | |
29 | You can contact the author at : |
30 | - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html |
31 | - LZ4 source repository : http://code.google.com/p/lz4/ |
32 | */ |
33 | |
34 | /* lz4hc_encoder.h must be included into lz4hc.c |
35 | The objective of this file is to create a single LZ4 compression function source |
36 | which will be instanciated multiple times with minor variations |
37 | depending on a set of #define. |
38 | */ |
39 | |
40 | //**************************** |
41 | // Check required defines |
42 | //**************************** |
43 | |
44 | #ifndef FUNCTION_NAME |
45 | # error "FUNTION_NAME is not defined" |
46 | #endif |
47 | |
48 | |
49 | //**************************** |
50 | // Local definitions |
51 | //**************************** |
52 | #define COMBINED_NAME_RAW(n1,n2) n1 ## n2 |
53 | #define COMBINED_NAME(n1,n2) COMBINED_NAME_RAW(n1,n2) |
54 | #define ENCODE_SEQUENCE_NAME COMBINED_NAME(FUNCTION_NAME,_encodeSequence) |
55 | #ifdef LIMITED_OUTPUT |
56 | # define ENCODE_SEQUENCE(i,o,a,m,r,d) if (ENCODE_SEQUENCE_NAME(i,o,a,m,r,d)) return 0; |
57 | #else |
58 | # define ENCODE_SEQUENCE(i,o,a,m,r,d) ENCODE_SEQUENCE_NAME(i,o,a,m,r) |
59 | #endif |
60 | |
61 | //**************************** |
62 | // Function code |
63 | //**************************** |
64 | |
65 | forceinline static int ENCODE_SEQUENCE_NAME ( |
66 | const BYTE** ip, |
67 | BYTE** op, |
68 | const BYTE** anchor, |
69 | int matchLength, |
70 | const BYTE* ref |
71 | #ifdef LIMITED_OUTPUT |
72 | ,BYTE* oend |
73 | #endif |
74 | ) |
75 | { |
76 | int length, len; |
77 | BYTE* token; |
78 | |
79 | // Encode Literal length |
80 | length = (int)(*ip - *anchor); |
81 | token = (*op)++; |
82 | #ifdef LIMITED_OUTPUT |
83 | if ((*op + length + (2 + 1 + LASTLITERALS) + (length>>8)) > oend) return 1; // Check output limit |
84 | #endif |
85 | if (length>=(int)RUN_MASK) { *token=(RUN_MASK<<ML_BITS); len = length-RUN_MASK; for(; len > 254 ; len-=255) *(*op)++ = 255; *(*op)++ = (BYTE)len; } |
86 | else *token = (BYTE)(length<<ML_BITS); |
87 | |
88 | // Copy Literals |
89 | LZ4_BLINDCOPY(*anchor, *op, length); |
90 | |
91 | // Encode Offset |
92 | LZ4_WRITE_LITTLEENDIAN_16(*op,(U16)(*ip-ref)); |
93 | |
94 | // Encode MatchLength |
95 | length = (int)(matchLength-MINMATCH); |
96 | #ifdef LIMITED_OUTPUT |
97 | if (*op + (1 + LASTLITERALS) + (length>>8) > oend) return 1; // Check output limit |
98 | #endif |
99 | if (length>=(int)ML_MASK) { *token+=ML_MASK; length-=ML_MASK; for(; length > 509 ; length-=510) { *(*op)++ = 255; *(*op)++ = 255; } if (length > 254) { length-=255; *(*op)++ = 255; } *(*op)++ = (BYTE)length; } |
100 | else *token += (BYTE)(length); |
101 | |
102 | // Prepare next loop |
103 | *ip += matchLength; |
104 | *anchor = *ip; |
105 | |
106 | return 0; |
107 | } |
108 | |
109 | |
110 | int COMBINED_NAME(FUNCTION_NAME,_continue) ( |
111 | void* ctxvoid, |
112 | const char* source, |
113 | char* dest, |
114 | int inputSize |
115 | #ifdef LIMITED_OUTPUT |
116 | ,int maxOutputSize |
117 | #endif |
118 | ) |
119 | { |
120 | LZ4HC_Data_Structure* ctx = (LZ4HC_Data_Structure*) ctxvoid; |
121 | const BYTE* ip = (const BYTE*) source; |
122 | const BYTE* anchor = ip; |
123 | const BYTE* const iend = ip + inputSize; |
124 | const BYTE* const mflimit = iend - MFLIMIT; |
125 | const BYTE* const matchlimit = (iend - LASTLITERALS); |
126 | |
127 | BYTE* op = (BYTE*) dest; |
128 | #ifdef LIMITED_OUTPUT |
129 | BYTE* const oend = op + maxOutputSize; |
130 | #endif |
131 | |
132 | int ml, ml2, ml3, ml0; |
133 | const BYTE* ref=NULL; |
134 | const BYTE* start2=NULL; |
135 | const BYTE* ref2=NULL; |
136 | const BYTE* start3=NULL; |
137 | const BYTE* ref3=NULL; |
138 | const BYTE* start0; |
139 | const BYTE* ref0; |
140 | |
141 | // Ensure blocks follow each other |
142 | if (ip != ctx->end) return 0; |
143 | ctx->end += inputSize; |
144 | |
145 | ip++; |
146 | |
147 | // Main Loop |
148 | while (ip < mflimit) |
149 | { |
150 | ml = LZ4HC_InsertAndFindBestMatch (ctx, ip, matchlimit, (&ref)); |
151 | if (!ml) { ip++; continue; } |
152 | |
153 | // saved, in case we would skip too much |
154 | start0 = ip; |
155 | ref0 = ref; |
156 | ml0 = ml; |
157 | |
158 | _Search2: |
159 | if (ip+ml < mflimit) |
160 | ml2 = LZ4HC_InsertAndGetWiderMatch(ctx, ip + ml - 2, ip + 1, matchlimit, ml, &ref2, &start2); |
161 | else ml2 = ml; |
162 | |
163 | if (ml2 == ml) // No better match |
164 | { |
165 | ENCODE_SEQUENCE(&ip, &op, &anchor, ml, ref, oend); |
166 | continue; |
167 | } |
168 | |
169 | if (start0 < ip) |
170 | { |
171 | if (start2 < ip + ml0) // empirical |
172 | { |
173 | ip = start0; |
174 | ref = ref0; |
175 | ml = ml0; |
176 | } |
177 | } |
178 | |
179 | // Here, start0==ip |
180 | if ((start2 - ip) < 3) // First Match too small : removed |
181 | { |
182 | ml = ml2; |
183 | ip = start2; |
184 | ref =ref2; |
185 | goto _Search2; |
186 | } |
187 | |
188 | _Search3: |
189 | // Currently we have : |
190 | // ml2 > ml1, and |
191 | // ip1+3 <= ip2 (usually < ip1+ml1) |
192 | if ((start2 - ip) < OPTIMAL_ML) |
193 | { |
194 | int correction; |
195 | int new_ml = ml; |
196 | if (new_ml > OPTIMAL_ML) new_ml = OPTIMAL_ML; |
197 | if (ip+new_ml > start2 + ml2 - MINMATCH) new_ml = (int)(start2 - ip) + ml2 - MINMATCH; |
198 | correction = new_ml - (int)(start2 - ip); |
199 | if (correction > 0) |
200 | { |
201 | start2 += correction; |
202 | ref2 += correction; |
203 | ml2 -= correction; |
204 | } |
205 | } |
206 | // Now, we have start2 = ip+new_ml, with new_ml = min(ml, OPTIMAL_ML=18) |
207 | |
208 | if (start2 + ml2 < mflimit) |
209 | ml3 = LZ4HC_InsertAndGetWiderMatch(ctx, start2 + ml2 - 3, start2, matchlimit, ml2, &ref3, &start3); |
210 | else ml3 = ml2; |
211 | |
212 | if (ml3 == ml2) // No better match : 2 sequences to encode |
213 | { |
214 | // ip & ref are known; Now for ml |
215 | if (start2 < ip+ml) ml = (int)(start2 - ip); |
216 | // Now, encode 2 sequences |
217 | ENCODE_SEQUENCE(&ip, &op, &anchor, ml, ref, oend); |
218 | ip = start2; |
219 | ENCODE_SEQUENCE(&ip, &op, &anchor, ml2, ref2, oend); |
220 | continue; |
221 | } |
222 | |
223 | if (start3 < ip+ml+3) // Not enough space for match 2 : remove it |
224 | { |
225 | if (start3 >= (ip+ml)) // can write Seq1 immediately ==> Seq2 is removed, so Seq3 becomes Seq1 |
226 | { |
227 | if (start2 < ip+ml) |
228 | { |
229 | int correction = (int)(ip+ml - start2); |
230 | start2 += correction; |
231 | ref2 += correction; |
232 | ml2 -= correction; |
233 | if (ml2 < MINMATCH) |
234 | { |
235 | start2 = start3; |
236 | ref2 = ref3; |
237 | ml2 = ml3; |
238 | } |
239 | } |
240 | |
241 | ENCODE_SEQUENCE(&ip, &op, &anchor, ml, ref, oend); |
242 | ip = start3; |
243 | ref = ref3; |
244 | ml = ml3; |
245 | |
246 | start0 = start2; |
247 | ref0 = ref2; |
248 | ml0 = ml2; |
249 | goto _Search2; |
250 | } |
251 | |
252 | start2 = start3; |
253 | ref2 = ref3; |
254 | ml2 = ml3; |
255 | goto _Search3; |
256 | } |
257 | |
258 | // OK, now we have 3 ascending matches; let's write at least the first one |
259 | // ip & ref are known; Now for ml |
260 | if (start2 < ip+ml) |
261 | { |
262 | if ((start2 - ip) < (int)ML_MASK) |
263 | { |
264 | int correction; |
265 | if (ml > OPTIMAL_ML) ml = OPTIMAL_ML; |
266 | if (ip + ml > start2 + ml2 - MINMATCH) ml = (int)(start2 - ip) + ml2 - MINMATCH; |
267 | correction = ml - (int)(start2 - ip); |
268 | if (correction > 0) |
269 | { |
270 | start2 += correction; |
271 | ref2 += correction; |
272 | ml2 -= correction; |
273 | } |
274 | } |
275 | else |
276 | { |
277 | ml = (int)(start2 - ip); |
278 | } |
279 | } |
280 | ENCODE_SEQUENCE(&ip, &op, &anchor, ml, ref, oend); |
281 | |
282 | ip = start2; |
283 | ref = ref2; |
284 | ml = ml2; |
285 | |
286 | start2 = start3; |
287 | ref2 = ref3; |
288 | ml2 = ml3; |
289 | |
290 | goto _Search3; |
291 | |
292 | } |
293 | |
294 | // Encode Last Literals |
295 | { |
296 | int lastRun = (int)(iend - anchor); |
297 | #ifdef LIMITED_OUTPUT |
298 | if (((char*)op - dest) + lastRun + 1 + ((lastRun+255-RUN_MASK)/255) > (U32)maxOutputSize) return 0; // Check output limit |
299 | #endif |
300 | if (lastRun>=(int)RUN_MASK) { *op++=(RUN_MASK<<ML_BITS); lastRun-=RUN_MASK; for(; lastRun > 254 ; lastRun-=255) *op++ = 255; *op++ = (BYTE) lastRun; } |
301 | else *op++ = (BYTE)(lastRun<<ML_BITS); |
302 | memcpy(op, anchor, iend - anchor); |
303 | op += iend-anchor; |
304 | } |
305 | |
306 | // End |
307 | return (int) (((char*)op)-dest); |
308 | } |
309 | |
310 | |
311 | int FUNCTION_NAME (const char* source, |
312 | char* dest, |
313 | int inputSize |
314 | #ifdef LIMITED_OUTPUT |
315 | ,int maxOutputSize |
316 | #endif |
317 | ) |
318 | { |
319 | void* ctx = LZ4_createHC(source); |
320 | int result; |
321 | if (ctx==NULL) return 0; |
322 | #ifdef LIMITED_OUTPUT |
323 | result = COMBINED_NAME(FUNCTION_NAME,_continue) (ctx, source, dest, inputSize, maxOutputSize); |
324 | #else |
325 | result = COMBINED_NAME(FUNCTION_NAME,_continue) (ctx, source, dest, inputSize); |
326 | #endif |
327 | LZ4_freeHC(ctx); |
328 | |
329 | return result; |
330 | } |
331 | |
332 | |
333 | //**************************** |
334 | // Clean defines |
335 | //**************************** |
336 | |
337 | // Required defines |
338 | #undef FUNCTION_NAME |
339 | |
340 | // Locally Generated |
341 | #undef ENCODE_SEQUENCE |
342 | #undef ENCODE_SEQUENCE_NAME |
343 | |
344 | // Optional defines |
345 | #ifdef LIMITED_OUTPUT |
346 | #undef LIMITED_OUTPUT |
347 | #endif |
348 | |
349 | |
350 | |