1/*
2 LZ4 HC Encoder - Part of LZ4 HC algorithm
3 Copyright (C) 2011-2013, Yann Collet.
4 BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
5
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are
8 met:
9
10 * Redistributions of source code must retain the above copyright
11 notice, this list of conditions and the following disclaimer.
12 * Redistributions in binary form must reproduce the above
13 copyright notice, this list of conditions and the following disclaimer
14 in the documentation and/or other materials provided with the
15 distribution.
16
17 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28
29 You can contact the author at :
30 - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html
31 - LZ4 source repository : http://code.google.com/p/lz4/
32*/
33
34/* lz4hc_encoder.h must be included into lz4hc.c
35 The objective of this file is to create a single LZ4 compression function source
36 which will be instanciated multiple times with minor variations
37 depending on a set of #define.
38*/
39
40//****************************
41// Check required defines
42//****************************
43
44#ifndef FUNCTION_NAME
45# error "FUNTION_NAME is not defined"
46#endif
47
48
49//****************************
50// Local definitions
51//****************************
52#define COMBINED_NAME_RAW(n1,n2) n1 ## n2
53#define COMBINED_NAME(n1,n2) COMBINED_NAME_RAW(n1,n2)
54#define ENCODE_SEQUENCE_NAME COMBINED_NAME(FUNCTION_NAME,_encodeSequence)
55#ifdef LIMITED_OUTPUT
56# define ENCODE_SEQUENCE(i,o,a,m,r,d) if (ENCODE_SEQUENCE_NAME(i,o,a,m,r,d)) return 0;
57#else
58# define ENCODE_SEQUENCE(i,o,a,m,r,d) ENCODE_SEQUENCE_NAME(i,o,a,m,r)
59#endif
60
61//****************************
62// Function code
63//****************************
64
65forceinline static int ENCODE_SEQUENCE_NAME (
66 const BYTE** ip,
67 BYTE** op,
68 const BYTE** anchor,
69 int matchLength,
70 const BYTE* ref
71#ifdef LIMITED_OUTPUT
72 ,BYTE* oend
73#endif
74 )
75{
76 int length, len;
77 BYTE* token;
78
79 // Encode Literal length
80 length = (int)(*ip - *anchor);
81 token = (*op)++;
82#ifdef LIMITED_OUTPUT
83 if ((*op + length + (2 + 1 + LASTLITERALS) + (length>>8)) > oend) return 1; // Check output limit
84#endif
85 if (length>=(int)RUN_MASK) { *token=(RUN_MASK<<ML_BITS); len = length-RUN_MASK; for(; len > 254 ; len-=255) *(*op)++ = 255; *(*op)++ = (BYTE)len; }
86 else *token = (BYTE)(length<<ML_BITS);
87
88 // Copy Literals
89 LZ4_BLINDCOPY(*anchor, *op, length);
90
91 // Encode Offset
92 LZ4_WRITE_LITTLEENDIAN_16(*op,(U16)(*ip-ref));
93
94 // Encode MatchLength
95 length = (int)(matchLength-MINMATCH);
96#ifdef LIMITED_OUTPUT
97 if (*op + (1 + LASTLITERALS) + (length>>8) > oend) return 1; // Check output limit
98#endif
99 if (length>=(int)ML_MASK) { *token+=ML_MASK; length-=ML_MASK; for(; length > 509 ; length-=510) { *(*op)++ = 255; *(*op)++ = 255; } if (length > 254) { length-=255; *(*op)++ = 255; } *(*op)++ = (BYTE)length; }
100 else *token += (BYTE)(length);
101
102 // Prepare next loop
103 *ip += matchLength;
104 *anchor = *ip;
105
106 return 0;
107}
108
109
110int COMBINED_NAME(FUNCTION_NAME,_continue) (
111 void* ctxvoid,
112 const char* source,
113 char* dest,
114 int inputSize
115#ifdef LIMITED_OUTPUT
116 ,int maxOutputSize
117#endif
118 )
119{
120 LZ4HC_Data_Structure* ctx = (LZ4HC_Data_Structure*) ctxvoid;
121 const BYTE* ip = (const BYTE*) source;
122 const BYTE* anchor = ip;
123 const BYTE* const iend = ip + inputSize;
124 const BYTE* const mflimit = iend - MFLIMIT;
125 const BYTE* const matchlimit = (iend - LASTLITERALS);
126
127 BYTE* op = (BYTE*) dest;
128#ifdef LIMITED_OUTPUT
129 BYTE* const oend = op + maxOutputSize;
130#endif
131
132 int ml, ml2, ml3, ml0;
133 const BYTE* ref=NULL;
134 const BYTE* start2=NULL;
135 const BYTE* ref2=NULL;
136 const BYTE* start3=NULL;
137 const BYTE* ref3=NULL;
138 const BYTE* start0;
139 const BYTE* ref0;
140
141 // Ensure blocks follow each other
142 if (ip != ctx->end) return 0;
143 ctx->end += inputSize;
144
145 ip++;
146
147 // Main Loop
148 while (ip < mflimit)
149 {
150 ml = LZ4HC_InsertAndFindBestMatch (ctx, ip, matchlimit, (&ref));
151 if (!ml) { ip++; continue; }
152
153 // saved, in case we would skip too much
154 start0 = ip;
155 ref0 = ref;
156 ml0 = ml;
157
158_Search2:
159 if (ip+ml < mflimit)
160 ml2 = LZ4HC_InsertAndGetWiderMatch(ctx, ip + ml - 2, ip + 1, matchlimit, ml, &ref2, &start2);
161 else ml2 = ml;
162
163 if (ml2 == ml) // No better match
164 {
165 ENCODE_SEQUENCE(&ip, &op, &anchor, ml, ref, oend);
166 continue;
167 }
168
169 if (start0 < ip)
170 {
171 if (start2 < ip + ml0) // empirical
172 {
173 ip = start0;
174 ref = ref0;
175 ml = ml0;
176 }
177 }
178
179 // Here, start0==ip
180 if ((start2 - ip) < 3) // First Match too small : removed
181 {
182 ml = ml2;
183 ip = start2;
184 ref =ref2;
185 goto _Search2;
186 }
187
188_Search3:
189 // Currently we have :
190 // ml2 > ml1, and
191 // ip1+3 <= ip2 (usually < ip1+ml1)
192 if ((start2 - ip) < OPTIMAL_ML)
193 {
194 int correction;
195 int new_ml = ml;
196 if (new_ml > OPTIMAL_ML) new_ml = OPTIMAL_ML;
197 if (ip+new_ml > start2 + ml2 - MINMATCH) new_ml = (int)(start2 - ip) + ml2 - MINMATCH;
198 correction = new_ml - (int)(start2 - ip);
199 if (correction > 0)
200 {
201 start2 += correction;
202 ref2 += correction;
203 ml2 -= correction;
204 }
205 }
206 // Now, we have start2 = ip+new_ml, with new_ml = min(ml, OPTIMAL_ML=18)
207
208 if (start2 + ml2 < mflimit)
209 ml3 = LZ4HC_InsertAndGetWiderMatch(ctx, start2 + ml2 - 3, start2, matchlimit, ml2, &ref3, &start3);
210 else ml3 = ml2;
211
212 if (ml3 == ml2) // No better match : 2 sequences to encode
213 {
214 // ip & ref are known; Now for ml
215 if (start2 < ip+ml) ml = (int)(start2 - ip);
216 // Now, encode 2 sequences
217 ENCODE_SEQUENCE(&ip, &op, &anchor, ml, ref, oend);
218 ip = start2;
219 ENCODE_SEQUENCE(&ip, &op, &anchor, ml2, ref2, oend);
220 continue;
221 }
222
223 if (start3 < ip+ml+3) // Not enough space for match 2 : remove it
224 {
225 if (start3 >= (ip+ml)) // can write Seq1 immediately ==> Seq2 is removed, so Seq3 becomes Seq1
226 {
227 if (start2 < ip+ml)
228 {
229 int correction = (int)(ip+ml - start2);
230 start2 += correction;
231 ref2 += correction;
232 ml2 -= correction;
233 if (ml2 < MINMATCH)
234 {
235 start2 = start3;
236 ref2 = ref3;
237 ml2 = ml3;
238 }
239 }
240
241 ENCODE_SEQUENCE(&ip, &op, &anchor, ml, ref, oend);
242 ip = start3;
243 ref = ref3;
244 ml = ml3;
245
246 start0 = start2;
247 ref0 = ref2;
248 ml0 = ml2;
249 goto _Search2;
250 }
251
252 start2 = start3;
253 ref2 = ref3;
254 ml2 = ml3;
255 goto _Search3;
256 }
257
258 // OK, now we have 3 ascending matches; let's write at least the first one
259 // ip & ref are known; Now for ml
260 if (start2 < ip+ml)
261 {
262 if ((start2 - ip) < (int)ML_MASK)
263 {
264 int correction;
265 if (ml > OPTIMAL_ML) ml = OPTIMAL_ML;
266 if (ip + ml > start2 + ml2 - MINMATCH) ml = (int)(start2 - ip) + ml2 - MINMATCH;
267 correction = ml - (int)(start2 - ip);
268 if (correction > 0)
269 {
270 start2 += correction;
271 ref2 += correction;
272 ml2 -= correction;
273 }
274 }
275 else
276 {
277 ml = (int)(start2 - ip);
278 }
279 }
280 ENCODE_SEQUENCE(&ip, &op, &anchor, ml, ref, oend);
281
282 ip = start2;
283 ref = ref2;
284 ml = ml2;
285
286 start2 = start3;
287 ref2 = ref3;
288 ml2 = ml3;
289
290 goto _Search3;
291
292 }
293
294 // Encode Last Literals
295 {
296 int lastRun = (int)(iend - anchor);
297#ifdef LIMITED_OUTPUT
298 if (((char*)op - dest) + lastRun + 1 + ((lastRun+255-RUN_MASK)/255) > (U32)maxOutputSize) return 0; // Check output limit
299#endif
300 if (lastRun>=(int)RUN_MASK) { *op++=(RUN_MASK<<ML_BITS); lastRun-=RUN_MASK; for(; lastRun > 254 ; lastRun-=255) *op++ = 255; *op++ = (BYTE) lastRun; }
301 else *op++ = (BYTE)(lastRun<<ML_BITS);
302 memcpy(op, anchor, iend - anchor);
303 op += iend-anchor;
304 }
305
306 // End
307 return (int) (((char*)op)-dest);
308}
309
310
311int FUNCTION_NAME (const char* source,
312 char* dest,
313 int inputSize
314#ifdef LIMITED_OUTPUT
315 ,int maxOutputSize
316#endif
317 )
318{
319 void* ctx = LZ4_createHC(source);
320 int result;
321 if (ctx==NULL) return 0;
322#ifdef LIMITED_OUTPUT
323 result = COMBINED_NAME(FUNCTION_NAME,_continue) (ctx, source, dest, inputSize, maxOutputSize);
324#else
325 result = COMBINED_NAME(FUNCTION_NAME,_continue) (ctx, source, dest, inputSize);
326#endif
327 LZ4_freeHC(ctx);
328
329 return result;
330}
331
332
333//****************************
334// Clean defines
335//****************************
336
337// Required defines
338#undef FUNCTION_NAME
339
340// Locally Generated
341#undef ENCODE_SEQUENCE
342#undef ENCODE_SEQUENCE_NAME
343
344// Optional defines
345#ifdef LIMITED_OUTPUT
346#undef LIMITED_OUTPUT
347#endif
348
349
350