1 | /* |
2 | LZ4 Encoder - Part of LZ4 compression algorithm |
3 | Copyright (C) 2011-2013, Yann Collet. |
4 | BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) |
5 | |
6 | Redistribution and use in source and binary forms, with or without |
7 | modification, are permitted provided that the following conditions are |
8 | met: |
9 | |
10 | * Redistributions of source code must retain the above copyright |
11 | notice, this list of conditions and the following disclaimer. |
12 | * Redistributions in binary form must reproduce the above |
13 | copyright notice, this list of conditions and the following disclaimer |
14 | in the documentation and/or other materials provided with the |
15 | distribution. |
16 | |
17 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
18 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
19 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
20 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
21 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
22 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
23 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
24 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
25 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
26 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
27 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
28 | |
29 | You can contact the author at : |
30 | - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html |
31 | - LZ4 source repository : http://code.google.com/p/lz4/ |
32 | */ |
33 | |
34 | /* lz4_encoder.h must be included into lz4.c |
35 | The objective of this file is to create a single LZ4 compression function source |
36 | which will be instanciated multiple times with minor variations |
37 | depending on a set of #define. |
38 | */ |
39 | |
40 | |
41 | |
42 | //**************************** |
43 | // Check required defines |
44 | //**************************** |
45 | |
46 | #ifndef FUNCTION_NAME |
47 | # error "FUNTION_NAME is not defined" |
48 | #endif |
49 | |
50 | |
51 | //**************************** |
52 | // Local definitions |
53 | //**************************** |
54 | |
55 | #ifdef COMPRESS_64K |
56 | # define HASHLOG (MEMORY_USAGE-1) |
57 | # define CURRENT_H_TYPE U16 |
58 | # define CURRENTBASE(base) const BYTE* const base = ip |
59 | #else |
60 | # define HASHLOG (MEMORY_USAGE-2) |
61 | # define CURRENT_H_TYPE HTYPE |
62 | # define CURRENTBASE(base) INITBASE(base) |
63 | #endif |
64 | |
65 | #define HASHTABLE_NBCELLS (1U<<HASHLOG) |
66 | #define LZ4_HASH(i) (((i) * 2654435761U) >> ((MINMATCH*8)-HASHLOG)) |
67 | #define LZ4_HASHVALUE(p) LZ4_HASH(A32(p)) |
68 | |
69 | |
70 | |
71 | //**************************** |
72 | // Function code |
73 | //**************************** |
74 | |
75 | int FUNCTION_NAME( |
76 | #ifdef USE_HEAPMEMORY |
77 | void* ctx, |
78 | #endif |
79 | const char* source, |
80 | char* dest, |
81 | int inputSize |
82 | #ifdef LIMITED_OUTPUT |
83 | ,int maxOutputSize |
84 | #endif |
85 | ) |
86 | { |
87 | #ifdef USE_HEAPMEMORY |
88 | CURRENT_H_TYPE* HashTable = (CURRENT_H_TYPE*)ctx; |
89 | #else |
90 | CURRENT_H_TYPE HashTable[HASHTABLE_NBCELLS] = {0}; |
91 | #endif |
92 | |
93 | const BYTE* ip = (BYTE*) source; |
94 | CURRENTBASE(base); |
95 | const BYTE* anchor = ip; |
96 | const BYTE* const iend = ip + inputSize; |
97 | const BYTE* const mflimit = iend - MFLIMIT; |
98 | #define matchlimit (iend - LASTLITERALS) |
99 | |
100 | BYTE* op = (BYTE*) dest; |
101 | #ifdef LIMITED_OUTPUT |
102 | BYTE* const oend = op + maxOutputSize; |
103 | #endif |
104 | |
105 | int length; |
106 | const int skipStrength = SKIPSTRENGTH; |
107 | U32 forwardH; |
108 | |
109 | |
110 | // Init |
111 | if (inputSize<MINLENGTH) goto _last_literals; |
112 | #ifdef COMPRESS_64K |
113 | if (inputSize>LZ4_64KLIMIT) return 0; // Size too large (not within 64K limit) |
114 | #endif |
115 | #ifdef USE_HEAPMEMORY |
116 | memset((void*)HashTable, 0, HASHTABLESIZE); |
117 | #endif |
118 | |
119 | // First Byte |
120 | HashTable[LZ4_HASHVALUE(ip)] = (CURRENT_H_TYPE)(ip - base); |
121 | ip++; forwardH = LZ4_HASHVALUE(ip); |
122 | |
123 | // Main Loop |
124 | for ( ; ; ) |
125 | { |
126 | int findMatchAttempts = (1U << skipStrength) + 3; |
127 | const BYTE* forwardIp = ip; |
128 | const BYTE* ref; |
129 | BYTE* token; |
130 | |
131 | // Find a match |
132 | do { |
133 | U32 h = forwardH; |
134 | int step = findMatchAttempts++ >> skipStrength; |
135 | ip = forwardIp; |
136 | forwardIp = ip + step; |
137 | |
138 | if unlikely(forwardIp > mflimit) { goto _last_literals; } |
139 | |
140 | forwardH = LZ4_HASHVALUE(forwardIp); |
141 | ref = base + HashTable[h]; |
142 | HashTable[h] = (CURRENT_H_TYPE)(ip - base); |
143 | |
144 | } while ((ref < ip - MAX_DISTANCE) || (A32(ref) != A32(ip))); |
145 | |
146 | // Catch up |
147 | while ((ip>anchor) && (ref>(BYTE*)source) && unlikely(ip[-1]==ref[-1])) { ip--; ref--; } |
148 | |
149 | // Encode Literal length |
150 | length = (int)(ip - anchor); |
151 | token = op++; |
152 | #ifdef LIMITED_OUTPUT |
153 | if unlikely(op + length + (2 + 1 + LASTLITERALS) + (length>>8) > oend) return 0; // Check output limit |
154 | #endif |
155 | if (length>=(int)RUN_MASK) |
156 | { |
157 | int len = length-RUN_MASK; |
158 | *token=(RUN_MASK<<ML_BITS); |
159 | for(; len >= 255 ; len-=255) *op++ = 255; |
160 | *op++ = (BYTE)len; |
161 | } |
162 | else *token = (BYTE)(length<<ML_BITS); |
163 | |
164 | // Copy Literals |
165 | LZ4_BLINDCOPY(anchor, op, length); |
166 | |
167 | _next_match: |
168 | // Encode Offset |
169 | LZ4_WRITE_LITTLEENDIAN_16(op,(U16)(ip-ref)); |
170 | |
171 | // Start Counting |
172 | ip+=MINMATCH; ref+=MINMATCH; // MinMatch already verified |
173 | anchor = ip; |
174 | while likely(ip<matchlimit-(STEPSIZE-1)) |
175 | { |
176 | UARCH diff = AARCH(ref) ^ AARCH(ip); |
177 | if (!diff) { ip+=STEPSIZE; ref+=STEPSIZE; continue; } |
178 | ip += LZ4_NbCommonBytes(diff); |
179 | goto _endCount; |
180 | } |
181 | if (LZ4_ARCH64) if ((ip<(matchlimit-3)) && (A32(ref) == A32(ip))) { ip+=4; ref+=4; } |
182 | if ((ip<(matchlimit-1)) && (A16(ref) == A16(ip))) { ip+=2; ref+=2; } |
183 | if ((ip<matchlimit) && (*ref == *ip)) ip++; |
184 | _endCount: |
185 | |
186 | // Encode MatchLength |
187 | length = (int)(ip - anchor); |
188 | #ifdef LIMITED_OUTPUT |
189 | if unlikely(op + (1 + LASTLITERALS) + (length>>8) > oend) return 0; // Check output limit |
190 | #endif |
191 | if (length>=(int)ML_MASK) |
192 | { |
193 | *token += ML_MASK; |
194 | length -= ML_MASK; |
195 | for (; length > 509 ; length-=510) { *op++ = 255; *op++ = 255; } |
196 | if (length >= 255) { length-=255; *op++ = 255; } |
197 | *op++ = (BYTE)length; |
198 | } |
199 | else *token += (BYTE)(length); |
200 | |
201 | // Test end of chunk |
202 | if (ip > mflimit) { anchor = ip; break; } |
203 | |
204 | // Fill table |
205 | HashTable[LZ4_HASHVALUE(ip-2)] = (CURRENT_H_TYPE)(ip - 2 - base); |
206 | |
207 | // Test next position |
208 | ref = base + HashTable[LZ4_HASHVALUE(ip)]; |
209 | HashTable[LZ4_HASHVALUE(ip)] = (CURRENT_H_TYPE)(ip - base); |
210 | if ((ref >= ip - MAX_DISTANCE) && (A32(ref) == A32(ip))) { token = op++; *token=0; goto _next_match; } |
211 | |
212 | // Prepare next loop |
213 | anchor = ip++; |
214 | forwardH = LZ4_HASHVALUE(ip); |
215 | } |
216 | |
217 | _last_literals: |
218 | // Encode Last Literals |
219 | { |
220 | int lastRun = (int)(iend - anchor); |
221 | #ifdef LIMITED_OUTPUT |
222 | if (((char*)op - dest) + lastRun + 1 + ((lastRun+255-RUN_MASK)/255) > (U32)maxOutputSize) return 0; // Check output limit |
223 | #endif |
224 | if (lastRun>=(int)RUN_MASK) { *op++=(RUN_MASK<<ML_BITS); lastRun-=RUN_MASK; for(; lastRun >= 255 ; lastRun-=255) *op++ = 255; *op++ = (BYTE) lastRun; } |
225 | else *op++ = (BYTE)(lastRun<<ML_BITS); |
226 | memcpy(op, anchor, iend - anchor); |
227 | op += iend-anchor; |
228 | } |
229 | |
230 | // End |
231 | return (int) (((char*)op)-dest); |
232 | } |
233 | |
234 | |
235 | |
236 | //**************************** |
237 | // Clean defines |
238 | //**************************** |
239 | |
240 | // Required defines |
241 | #undef FUNCTION_NAME |
242 | |
243 | // Locally Generated |
244 | #undef HASHLOG |
245 | #undef HASHTABLE_NBCELLS |
246 | #undef LZ4_HASH |
247 | #undef LZ4_HASHVALUE |
248 | #undef CURRENT_H_TYPE |
249 | #undef CURRENTBASE |
250 | |
251 | // Optional defines |
252 | #ifdef LIMITED_OUTPUT |
253 | #undef LIMITED_OUTPUT |
254 | #endif |
255 | |
256 | #ifdef USE_HEAPMEMORY |
257 | #undef USE_HEAPMEMORY |
258 | #endif |
259 | |