1 /*
2 * LZ4 - Fast LZ compression algorithm
3 * Header File
4 * Copyright (C) 2011-2013, Yann Collet.
5 * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are
9 * met:
10 *
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above
14 * copyright notice, this list of conditions and the following disclaimer
15 * in the documentation and/or other materials provided with the
16 * distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 *
30 * You can contact the author at :
31 * - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html
32 * - LZ4 source repository : http://code.google.com/p/lz4/
33 */
34
35 #include <sys/zfs_context.h>
36
37 static int real_LZ4_compress(const char *source, char *dest, int isize,
38 int osize);
39 static int LZ4_compressBound(int isize);
40 static int LZ4_uncompress_unknownOutputSize(const char *source, char *dest,
41 int isize, int maxOutputSize);
42 static int LZ4_compressCtx(void *ctx, const char *source, char *dest,
43 int isize, int osize);
44 static int LZ4_compress64kCtx(void *ctx, const char *source, char *dest,
45 int isize, int osize);
46
47 static kmem_cache_t *lz4_ctx_cache;
48
49 /*ARGSUSED*/
50 size_t
lz4_compress(void * s_start,void * d_start,size_t s_len,size_t d_len,int n)51 lz4_compress(void *s_start, void *d_start, size_t s_len, size_t d_len, int n)
52 {
53 uint32_t bufsiz;
54 char *dest = d_start;
55
56 ASSERT(d_len >= sizeof (bufsiz));
57
58 bufsiz = real_LZ4_compress(s_start, &dest[sizeof (bufsiz)], s_len,
59 d_len - sizeof (bufsiz));
60
61 /* Signal an error if the compression routine returned zero. */
62 if (bufsiz == 0)
63 return (s_len);
64
65 /*
66 * Encode the compresed buffer size at the start. We'll need this in
67 * decompression to counter the effects of padding which might be
68 * added to the compressed buffer and which, if unhandled, would
69 * confuse the hell out of our decompression function.
70 */
71 *(uint32_t *)dest = BE_32(bufsiz);
72
73 return (bufsiz + sizeof (bufsiz));
74 }
75
76 /*ARGSUSED*/
77 int
lz4_decompress(void * s_start,void * d_start,size_t s_len,size_t d_len,int n)78 lz4_decompress(void *s_start, void *d_start, size_t s_len, size_t d_len, int n)
79 {
80 const char *src = s_start;
81 uint32_t bufsiz = BE_IN32(src);
82
83 /* invalid compressed buffer size encoded at start */
84 if (bufsiz + sizeof (bufsiz) > s_len)
85 return (1);
86
87 /*
88 * Returns 0 on success (decompression function returned non-negative)
89 * and non-zero on failure (decompression function returned negative.
90 */
91 return (LZ4_uncompress_unknownOutputSize(&src[sizeof (bufsiz)],
92 d_start, bufsiz, d_len) < 0);
93 }
94
95 /*
96 * LZ4 API Description:
97 *
98 * Simple Functions:
99 * real_LZ4_compress() :
100 * isize : is the input size. Max supported value is ~1.9GB
101 * return : the number of bytes written in buffer dest
102 * or 0 if the compression fails (if LZ4_COMPRESSMIN is set).
103 * note : destination buffer must be already allocated.
104 * destination buffer must be sized to handle worst cases
105 * situations (input data not compressible) worst case size
106 * evaluation is provided by function LZ4_compressBound().
107 *
108 * Advanced Functions
109 *
110 * LZ4_compressBound() :
111 * Provides the maximum size that LZ4 may output in a "worst case"
112 * scenario (input data not compressible) primarily useful for memory
113 * allocation of output buffer.
114 *
115 * isize : is the input size. Max supported value is ~1.9GB
116 * return : maximum output size in a "worst case" scenario
117 * note : this function is limited by "int" range (2^31-1)
118 *
119 * LZ4_uncompress_unknownOutputSize() :
120 * isize : is the input size, therefore the compressed size
121 * maxOutputSize : is the size of the destination buffer (which must be
122 * already allocated)
123 * return : the number of bytes decoded in the destination buffer
124 * (necessarily <= maxOutputSize). If the source stream is
125 * malformed, the function will stop decoding and return a
126 * negative result, indicating the byte position of the faulty
127 * instruction. This function never writes beyond dest +
128 * maxOutputSize, and is therefore protected against malicious
129 * data packets.
130 * note : Destination buffer must be already allocated.
131 *
132 * LZ4_compressCtx() :
133 * This function explicitly handles the CTX memory structure.
134 *
135 * ILLUMOS CHANGES: the CTX memory structure must be explicitly allocated
136 * by the caller (either on the stack or using kmem_zalloc). Passing NULL
137 * isn't valid.
138 *
139 * LZ4_compress64kCtx() :
140 * Same as LZ4_compressCtx(), but specific to small inputs (<64KB).
141 * isize *Must* be <64KB, otherwise the output will be corrupted.
142 *
143 * ILLUMOS CHANGES: the CTX memory structure must be explicitly allocated
144 * by the caller (either on the stack or using kmem_zalloc). Passing NULL
145 * isn't valid.
146 */
147
148 /*
149 * Tuning parameters
150 */
151
152 /*
153 * COMPRESSIONLEVEL: Increasing this value improves compression ratio
154 * Lowering this value reduces memory usage. Reduced memory usage
155 * typically improves speed, due to cache effect (ex: L1 32KB for Intel,
156 * L1 64KB for AMD). Memory usage formula : N->2^(N+2) Bytes
157 * (examples : 12 -> 16KB ; 17 -> 512KB)
158 */
159 #define COMPRESSIONLEVEL 12
160
161 /*
162 * NOTCOMPRESSIBLE_CONFIRMATION: Decreasing this value will make the
163 * algorithm skip faster data segments considered "incompressible".
164 * This may decrease compression ratio dramatically, but will be
165 * faster on incompressible data. Increasing this value will make
166 * the algorithm search more before declaring a segment "incompressible".
167 * This could improve compression a bit, but will be slower on
168 * incompressible data. The default value (6) is recommended.
169 */
170 #define NOTCOMPRESSIBLE_CONFIRMATION 6
171
172 /*
173 * BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE: This will provide a boost to
174 * performance for big endian cpu, but the resulting compressed stream
175 * will be incompatible with little-endian CPU. You can set this option
176 * to 1 in situations where data will stay within closed environment.
177 * This option is useless on Little_Endian CPU (such as x86).
178 */
179 /* #define BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE 1 */
180
181 /*
182 * CPU Feature Detection
183 */
184
185 /* 32 or 64 bits ? */
186 #if (defined(__x86_64__) || defined(__x86_64) || defined(__amd64__) || \
187 defined(__amd64) || defined(__ppc64__) || defined(_WIN64) || \
188 defined(__LP64__) || defined(_LP64))
189 #define LZ4_ARCH64 1
190 /*
191 * Illumos: On amd64 we have 20k of stack and 24k on sun4u and sun4v, so we
192 * can spend 16k on the algorithm
193 */
194 /* FreeBSD: Use heap for all platforms for now */
195 #define STACKLIMIT 0
196 #else
197 #define LZ4_ARCH64 0
198 /*
199 * Illumos: On i386 we only have 12k of stack, so in order to maintain the
200 * same COMPRESSIONLEVEL we have to use heap allocation. Performance will
201 * suck, but alas, it's ZFS on 32-bit we're talking about, so...
202 */
203 #define STACKLIMIT 0
204 #endif
205
206 /*
207 * Little Endian or Big Endian?
208 * Note: overwrite the below #define if you know your architecture endianess.
209 */
210 #if BYTE_ORDER == BIG_ENDIAN
211 #define LZ4_BIG_ENDIAN 1
212 #else
213 /*
214 * Little Endian assumed. PDP Endian and other very rare endian format
215 * are unsupported.
216 */
217 #endif
218
219 /*
220 * Unaligned memory access is automatically enabled for "common" CPU,
221 * such as x86. For others CPU, the compiler will be more cautious, and
222 * insert extra code to ensure aligned access is respected. If you know
223 * your target CPU supports unaligned memory access, you may want to
224 * force this option manually to improve performance
225 */
226 #if defined(__ARM_FEATURE_UNALIGNED)
227 #define LZ4_FORCE_UNALIGNED_ACCESS 1
228 #endif
229
230 /*
231 * FreeBSD: can't use GCC's __builtin_ctz when using sparc64 because
232 * gcc currently rely on libcompiler_rt.
233 *
234 * TODO: revisit this when situation changes.
235 */
236 #if defined(__sparc64__)
237 #define LZ4_FORCE_SW_BITCOUNT
238 #endif
239
240 /*
241 * Compiler Options
242 */
243 #if __STDC_VERSION__ >= 199901L /* C99 */
244 /* "restrict" is a known keyword */
245 #else
246 /* Disable restrict */
247 #define restrict
248 #endif
249
250 #define lz4_bswap16(x) ((unsigned short int) ((((x) >> 8) & 0xffu) | \
251 (((x) & 0xffu) << 8)))
252
253 #define expect(expr, value) (__builtin_expect((expr), (value)))
254
255 #if defined(likely)
256 #undef likely
257 #endif
258 #if defined(unlikely)
259 #undef unlikely
260 #endif
261
262 #define likely(expr) expect((expr) != 0, 1)
263 #define unlikely(expr) expect((expr) != 0, 0)
264
265 /* Basic types */
266 #define BYTE uint8_t
267 #define U16 uint16_t
268 #define U32 uint32_t
269 #define S32 int32_t
270 #define U64 uint64_t
271
272 #ifndef LZ4_FORCE_UNALIGNED_ACCESS
273 #pragma pack(1)
274 #endif
275
276 typedef struct _U16_S {
277 U16 v;
278 } U16_S;
279 typedef struct _U32_S {
280 U32 v;
281 } U32_S;
282 typedef struct _U64_S {
283 U64 v;
284 } U64_S;
285
286 #ifndef LZ4_FORCE_UNALIGNED_ACCESS
287 #pragma pack()
288 #endif
289
290 #define A64(x) (((U64_S *)(x))->v)
291 #define A32(x) (((U32_S *)(x))->v)
292 #define A16(x) (((U16_S *)(x))->v)
293
294 /*
295 * Constants
296 */
297 #define MINMATCH 4
298
299 #define HASH_LOG COMPRESSIONLEVEL
300 #define HASHTABLESIZE (1 << HASH_LOG)
301 #define HASH_MASK (HASHTABLESIZE - 1)
302
303 #define SKIPSTRENGTH (NOTCOMPRESSIBLE_CONFIRMATION > 2 ? \
304 NOTCOMPRESSIBLE_CONFIRMATION : 2)
305
306 /*
307 * Defines if memory is allocated into the stack (local variable),
308 * or into the heap (kmem_alloc()).
309 */
310 #define HEAPMODE (HASH_LOG > STACKLIMIT)
311 #define COPYLENGTH 8
312 #define LASTLITERALS 5
313 #define MFLIMIT (COPYLENGTH + MINMATCH)
314 #define MINLENGTH (MFLIMIT + 1)
315
316 #define MAXD_LOG 16
317 #define MAX_DISTANCE ((1 << MAXD_LOG) - 1)
318
319 #define ML_BITS 4
320 #define ML_MASK ((1U<<ML_BITS)-1)
321 #define RUN_BITS (8-ML_BITS)
322 #define RUN_MASK ((1U<<RUN_BITS)-1)
323
324
325 /*
326 * Architecture-specific macros
327 */
328 #if LZ4_ARCH64
329 #define STEPSIZE 8
330 #define UARCH U64
331 #define AARCH A64
332 #define LZ4_COPYSTEP(s, d) A64(d) = A64(s); d += 8; s += 8;
333 #define LZ4_COPYPACKET(s, d) LZ4_COPYSTEP(s, d)
334 #define LZ4_SECURECOPY(s, d, e) if (d < e) LZ4_WILDCOPY(s, d, e)
335 #define HTYPE U32
336 #define INITBASE(base) const BYTE* const base = ip
337 #else /* !LZ4_ARCH64 */
338 #define STEPSIZE 4
339 #define UARCH U32
340 #define AARCH A32
341 #define LZ4_COPYSTEP(s, d) A32(d) = A32(s); d += 4; s += 4;
342 #define LZ4_COPYPACKET(s, d) LZ4_COPYSTEP(s, d); LZ4_COPYSTEP(s, d);
343 #define LZ4_SECURECOPY LZ4_WILDCOPY
344 #define HTYPE const BYTE *
345 #define INITBASE(base) const int base = 0
346 #endif /* !LZ4_ARCH64 */
347
348 #if (defined(LZ4_BIG_ENDIAN) && !defined(BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE))
349 #define LZ4_READ_LITTLEENDIAN_16(d, s, p) \
350 { U16 v = A16(p); v = lz4_bswap16(v); d = (s) - v; }
351 #define LZ4_WRITE_LITTLEENDIAN_16(p, i) \
352 { U16 v = (U16)(i); v = lz4_bswap16(v); A16(p) = v; p += 2; }
353 #else
354 #define LZ4_READ_LITTLEENDIAN_16(d, s, p) { d = (s) - A16(p); }
355 #define LZ4_WRITE_LITTLEENDIAN_16(p, v) { A16(p) = v; p += 2; }
356 #endif
357
358
359 /* Local structures */
360 struct refTables {
361 HTYPE hashTable[HASHTABLESIZE];
362 };
363
364
365 /* Macros */
366 #define LZ4_HASH_FUNCTION(i) (((i) * 2654435761U) >> ((MINMATCH * 8) - \
367 HASH_LOG))
368 #define LZ4_HASH_VALUE(p) LZ4_HASH_FUNCTION(A32(p))
369 #define LZ4_WILDCOPY(s, d, e) do { LZ4_COPYPACKET(s, d) } while (d < e);
370 #define LZ4_BLINDCOPY(s, d, l) { BYTE* e = (d) + l; LZ4_WILDCOPY(s, d, e); \
371 d = e; }
372
373
374 /* Private functions */
375 #if LZ4_ARCH64
376
377 static inline int
LZ4_NbCommonBytes(register U64 val)378 LZ4_NbCommonBytes(register U64 val)
379 {
380 #if defined(LZ4_BIG_ENDIAN)
381 #if !defined(LZ4_FORCE_SW_BITCOUNT)
382 return (__builtin_clzll(val) >> 3);
383 #else
384 int r;
385 if (!(val >> 32)) {
386 r = 4;
387 } else {
388 r = 0;
389 val >>= 32;
390 }
391 if (!(val >> 16)) {
392 r += 2;
393 val >>= 8;
394 } else {
395 val >>= 24;
396 }
397 r += (!val);
398 return (r);
399 #endif
400 #else
401 #if !defined(LZ4_FORCE_SW_BITCOUNT)
402 return (__builtin_ctzll(val) >> 3);
403 #else
404 static const int DeBruijnBytePos[64] =
405 { 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 1, 5,
406 3, 5, 1, 3, 4, 4, 2, 5, 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5,
407 5, 3, 4, 5, 6, 7, 1, 2, 4, 6, 4,
408 4, 5, 7, 2, 6, 5, 7, 6, 7, 7
409 };
410 return DeBruijnBytePos[((U64) ((val & -val) * 0x0218A392CDABBD3F)) >>
411 58];
412 #endif
413 #endif
414 }
415
416 #else
417
418 static inline int
LZ4_NbCommonBytes(register U32 val)419 LZ4_NbCommonBytes(register U32 val)
420 {
421 #if defined(LZ4_BIG_ENDIAN)
422 #if !defined(LZ4_FORCE_SW_BITCOUNT)
423 return (__builtin_clz(val) >> 3);
424 #else
425 int r;
426 if (!(val >> 16)) {
427 r = 2;
428 val >>= 8;
429 } else {
430 r = 0;
431 val >>= 24;
432 }
433 r += (!val);
434 return (r);
435 #endif
436 #else
437 #if !defined(LZ4_FORCE_SW_BITCOUNT)
438 return (__builtin_ctz(val) >> 3);
439 #else
440 static const int DeBruijnBytePos[32] = {
441 0, 0, 3, 0, 3, 1, 3, 0,
442 3, 2, 2, 1, 3, 2, 0, 1,
443 3, 3, 1, 2, 2, 2, 2, 0,
444 3, 1, 2, 0, 1, 0, 1, 1
445 };
446 return DeBruijnBytePos[((U32) ((val & -(S32) val) * 0x077CB531U)) >>
447 27];
448 #endif
449 #endif
450 }
451
452 #endif
453
454 /* Public functions */
455
456 static int
LZ4_compressBound(int isize)457 LZ4_compressBound(int isize)
458 {
459 return (isize + (isize / 255) + 16);
460 }
461
462 /* Compression functions */
463
464 /*ARGSUSED*/
465 static int
LZ4_compressCtx(void * ctx,const char * source,char * dest,int isize,int osize)466 LZ4_compressCtx(void *ctx, const char *source, char *dest, int isize,
467 int osize)
468 {
469 #if HEAPMODE
470 struct refTables *srt = (struct refTables *)ctx;
471 HTYPE *HashTable = (HTYPE *) (srt->hashTable);
472 #else
473 HTYPE HashTable[HASHTABLESIZE] = { 0 };
474 #endif
475
476 const BYTE *ip = (BYTE *) source;
477 INITBASE(base);
478 const BYTE *anchor = ip;
479 const BYTE *const iend = ip + isize;
480 const BYTE *const oend = (BYTE *) dest + osize;
481 const BYTE *const mflimit = iend - MFLIMIT;
482 #define matchlimit (iend - LASTLITERALS)
483
484 BYTE *op = (BYTE *) dest;
485
486 int len, length;
487 const int skipStrength = SKIPSTRENGTH;
488 U32 forwardH;
489
490
491 /* Init */
492 if (isize < MINLENGTH)
493 goto _last_literals;
494
495 /* First Byte */
496 HashTable[LZ4_HASH_VALUE(ip)] = ip - base;
497 ip++;
498 forwardH = LZ4_HASH_VALUE(ip);
499
500 /* Main Loop */
501 for (;;) {
502 int findMatchAttempts = (1U << skipStrength) + 3;
503 const BYTE *forwardIp = ip;
504 const BYTE *ref;
505 BYTE *token;
506
507 /* Find a match */
508 do {
509 U32 h = forwardH;
510 int step = findMatchAttempts++ >> skipStrength;
511 ip = forwardIp;
512 forwardIp = ip + step;
513
514 if unlikely(forwardIp > mflimit) {
515 goto _last_literals;
516 }
517
518 forwardH = LZ4_HASH_VALUE(forwardIp);
519 ref = base + HashTable[h];
520 HashTable[h] = ip - base;
521
522 } while ((ref < ip - MAX_DISTANCE) || (A32(ref) != A32(ip)));
523
524 /* Catch up */
525 while ((ip > anchor) && (ref > (BYTE *) source) &&
526 unlikely(ip[-1] == ref[-1])) {
527 ip--;
528 ref--;
529 }
530
531 /* Encode Literal length */
532 length = ip - anchor;
533 token = op++;
534
535 /* Check output limit */
536 if unlikely(op + length + (2 + 1 + LASTLITERALS) +
537 (length >> 8) > oend)
538 return (0);
539
540 if (length >= (int)RUN_MASK) {
541 *token = (RUN_MASK << ML_BITS);
542 len = length - RUN_MASK;
543 for (; len > 254; len -= 255)
544 *op++ = 255;
545 *op++ = (BYTE)len;
546 } else
547 *token = (length << ML_BITS);
548
549 /* Copy Literals */
550 LZ4_BLINDCOPY(anchor, op, length);
551
552 _next_match:
553 /* Encode Offset */
554 LZ4_WRITE_LITTLEENDIAN_16(op, ip - ref);
555
556 /* Start Counting */
557 ip += MINMATCH;
558 ref += MINMATCH; /* MinMatch verified */
559 anchor = ip;
560 while likely(ip < matchlimit - (STEPSIZE - 1)) {
561 UARCH diff = AARCH(ref) ^ AARCH(ip);
562 if (!diff) {
563 ip += STEPSIZE;
564 ref += STEPSIZE;
565 continue;
566 }
567 ip += LZ4_NbCommonBytes(diff);
568 goto _endCount;
569 }
570 #if LZ4_ARCH64
571 if ((ip < (matchlimit - 3)) && (A32(ref) == A32(ip))) {
572 ip += 4;
573 ref += 4;
574 }
575 #endif
576 if ((ip < (matchlimit - 1)) && (A16(ref) == A16(ip))) {
577 ip += 2;
578 ref += 2;
579 }
580 if ((ip < matchlimit) && (*ref == *ip))
581 ip++;
582 _endCount:
583
584 /* Encode MatchLength */
585 len = (ip - anchor);
586 /* Check output limit */
587 if unlikely(op + (1 + LASTLITERALS) + (len >> 8) > oend)
588 return (0);
589 if (len >= (int)ML_MASK) {
590 *token += ML_MASK;
591 len -= ML_MASK;
592 for (; len > 509; len -= 510) {
593 *op++ = 255;
594 *op++ = 255;
595 }
596 if (len > 254) {
597 len -= 255;
598 *op++ = 255;
599 }
600 *op++ = (BYTE)len;
601 } else
602 *token += len;
603
604 /* Test end of chunk */
605 if (ip > mflimit) {
606 anchor = ip;
607 break;
608 }
609 /* Fill table */
610 HashTable[LZ4_HASH_VALUE(ip - 2)] = ip - 2 - base;
611
612 /* Test next position */
613 ref = base + HashTable[LZ4_HASH_VALUE(ip)];
614 HashTable[LZ4_HASH_VALUE(ip)] = ip - base;
615 if ((ref > ip - (MAX_DISTANCE + 1)) && (A32(ref) == A32(ip))) {
616 token = op++;
617 *token = 0;
618 goto _next_match;
619 }
620 /* Prepare next loop */
621 anchor = ip++;
622 forwardH = LZ4_HASH_VALUE(ip);
623 }
624
625 _last_literals:
626 /* Encode Last Literals */
627 {
628 int lastRun = iend - anchor;
629 if (op + lastRun + 1 + ((lastRun + 255 - RUN_MASK) / 255) >
630 oend)
631 return (0);
632 if (lastRun >= (int)RUN_MASK) {
633 *op++ = (RUN_MASK << ML_BITS);
634 lastRun -= RUN_MASK;
635 for (; lastRun > 254; lastRun -= 255) {
636 *op++ = 255;
637 }
638 *op++ = (BYTE)lastRun;
639 } else
640 *op++ = (lastRun << ML_BITS);
641 (void) memcpy(op, anchor, iend - anchor);
642 op += iend - anchor;
643 }
644
645 /* End */
646 return (int)(((char *)op) - dest);
647 }
648
649
650
651 /* Note : this function is valid only if isize < LZ4_64KLIMIT */
652 #define LZ4_64KLIMIT ((1 << 16) + (MFLIMIT - 1))
653 #define HASHLOG64K (HASH_LOG + 1)
654 #define HASH64KTABLESIZE (1U << HASHLOG64K)
655 #define LZ4_HASH64K_FUNCTION(i) (((i) * 2654435761U) >> ((MINMATCH*8) - \
656 HASHLOG64K))
657 #define LZ4_HASH64K_VALUE(p) LZ4_HASH64K_FUNCTION(A32(p))
658
659 /*ARGSUSED*/
660 static int
LZ4_compress64kCtx(void * ctx,const char * source,char * dest,int isize,int osize)661 LZ4_compress64kCtx(void *ctx, const char *source, char *dest, int isize,
662 int osize)
663 {
664 #if HEAPMODE
665 struct refTables *srt = (struct refTables *)ctx;
666 U16 *HashTable = (U16 *) (srt->hashTable);
667 #else
668 U16 HashTable[HASH64KTABLESIZE] = { 0 };
669 #endif
670
671 const BYTE *ip = (BYTE *) source;
672 const BYTE *anchor = ip;
673 const BYTE *const base = ip;
674 const BYTE *const iend = ip + isize;
675 const BYTE *const oend = (BYTE *) dest + osize;
676 const BYTE *const mflimit = iend - MFLIMIT;
677 #define matchlimit (iend - LASTLITERALS)
678
679 BYTE *op = (BYTE *) dest;
680
681 int len, length;
682 const int skipStrength = SKIPSTRENGTH;
683 U32 forwardH;
684
685 /* Init */
686 if (isize < MINLENGTH)
687 goto _last_literals;
688
689 /* First Byte */
690 ip++;
691 forwardH = LZ4_HASH64K_VALUE(ip);
692
693 /* Main Loop */
694 for (;;) {
695 int findMatchAttempts = (1U << skipStrength) + 3;
696 const BYTE *forwardIp = ip;
697 const BYTE *ref;
698 BYTE *token;
699
700 /* Find a match */
701 do {
702 U32 h = forwardH;
703 int step = findMatchAttempts++ >> skipStrength;
704 ip = forwardIp;
705 forwardIp = ip + step;
706
707 if (forwardIp > mflimit) {
708 goto _last_literals;
709 }
710
711 forwardH = LZ4_HASH64K_VALUE(forwardIp);
712 ref = base + HashTable[h];
713 HashTable[h] = ip - base;
714
715 } while (A32(ref) != A32(ip));
716
717 /* Catch up */
718 while ((ip > anchor) && (ref > (BYTE *) source) &&
719 (ip[-1] == ref[-1])) {
720 ip--;
721 ref--;
722 }
723
724 /* Encode Literal length */
725 length = ip - anchor;
726 token = op++;
727
728 /* Check output limit */
729 if unlikely(op + length + (2 + 1 + LASTLITERALS) +
730 (length >> 8) > oend)
731 return (0);
732
733 if (length >= (int)RUN_MASK) {
734 *token = (RUN_MASK << ML_BITS);
735 len = length - RUN_MASK;
736 for (; len > 254; len -= 255)
737 *op++ = 255;
738 *op++ = (BYTE)len;
739 } else
740 *token = (length << ML_BITS);
741
742 /* Copy Literals */
743 LZ4_BLINDCOPY(anchor, op, length);
744
745 _next_match:
746 /* Encode Offset */
747 LZ4_WRITE_LITTLEENDIAN_16(op, ip - ref);
748
749 /* Start Counting */
750 ip += MINMATCH;
751 ref += MINMATCH; /* MinMatch verified */
752 anchor = ip;
753 while (ip < matchlimit - (STEPSIZE - 1)) {
754 UARCH diff = AARCH(ref) ^ AARCH(ip);
755 if (!diff) {
756 ip += STEPSIZE;
757 ref += STEPSIZE;
758 continue;
759 }
760 ip += LZ4_NbCommonBytes(diff);
761 goto _endCount;
762 }
763 #if LZ4_ARCH64
764 if ((ip < (matchlimit - 3)) && (A32(ref) == A32(ip))) {
765 ip += 4;
766 ref += 4;
767 }
768 #endif
769 if ((ip < (matchlimit - 1)) && (A16(ref) == A16(ip))) {
770 ip += 2;
771 ref += 2;
772 }
773 if ((ip < matchlimit) && (*ref == *ip))
774 ip++;
775 _endCount:
776
777 /* Encode MatchLength */
778 len = (ip - anchor);
779 /* Check output limit */
780 if unlikely(op + (1 + LASTLITERALS) + (len >> 8) > oend)
781 return (0);
782 if (len >= (int)ML_MASK) {
783 *token += ML_MASK;
784 len -= ML_MASK;
785 for (; len > 509; len -= 510) {
786 *op++ = 255;
787 *op++ = 255;
788 }
789 if (len > 254) {
790 len -= 255;
791 *op++ = 255;
792 }
793 *op++ = (BYTE)len;
794 } else
795 *token += len;
796
797 /* Test end of chunk */
798 if (ip > mflimit) {
799 anchor = ip;
800 break;
801 }
802 /* Fill table */
803 HashTable[LZ4_HASH64K_VALUE(ip - 2)] = ip - 2 - base;
804
805 /* Test next position */
806 ref = base + HashTable[LZ4_HASH64K_VALUE(ip)];
807 HashTable[LZ4_HASH64K_VALUE(ip)] = ip - base;
808 if (A32(ref) == A32(ip)) {
809 token = op++;
810 *token = 0;
811 goto _next_match;
812 }
813 /* Prepare next loop */
814 anchor = ip++;
815 forwardH = LZ4_HASH64K_VALUE(ip);
816 }
817
818 _last_literals:
819 /* Encode Last Literals */
820 {
821 int lastRun = iend - anchor;
822 if (op + lastRun + 1 + ((lastRun + 255 - RUN_MASK) / 255) >
823 oend)
824 return (0);
825 if (lastRun >= (int)RUN_MASK) {
826 *op++ = (RUN_MASK << ML_BITS);
827 lastRun -= RUN_MASK;
828 for (; lastRun > 254; lastRun -= 255)
829 *op++ = 255;
830 *op++ = (BYTE)lastRun;
831 } else
832 *op++ = (lastRun << ML_BITS);
833 (void) memcpy(op, anchor, iend - anchor);
834 op += iend - anchor;
835 }
836
837 /* End */
838 return (int)(((char *)op) - dest);
839 }
840
841 static int
real_LZ4_compress(const char * source,char * dest,int isize,int osize)842 real_LZ4_compress(const char *source, char *dest, int isize, int osize)
843 {
844 #if HEAPMODE
845 void *ctx = kmem_cache_alloc(lz4_ctx_cache, KM_NOSLEEP);
846 int result;
847
848 /*
849 * out of kernel memory, gently fall through - this will disable
850 * compression in zio_compress_data
851 */
852 if (ctx == NULL)
853 return (0);
854
855 bzero(ctx, sizeof(struct refTables));
856 if (isize < LZ4_64KLIMIT)
857 result = LZ4_compress64kCtx(ctx, source, dest, isize, osize);
858 else
859 result = LZ4_compressCtx(ctx, source, dest, isize, osize);
860
861 kmem_cache_free(lz4_ctx_cache, ctx);
862 return (result);
863 #else
864 if (isize < (int)LZ4_64KLIMIT)
865 return (LZ4_compress64kCtx(NULL, source, dest, isize, osize));
866 return (LZ4_compressCtx(NULL, source, dest, isize, osize));
867 #endif
868 }
869
870 /* Decompression functions */
871
872 /*
873 * Note: The decoding functionLZ4_uncompress_unknownOutputSize() is safe
874 * against "buffer overflow" attack type. They will never write nor
875 * read outside of the provided output buffers.
876 * LZ4_uncompress_unknownOutputSize() also insures that it will never
877 * read outside of the input buffer. A corrupted input will produce
878 * an error result, a negative int, indicating the position of the
879 * error within input stream.
880 */
881
882 static int
LZ4_uncompress_unknownOutputSize(const char * source,char * dest,int isize,int maxOutputSize)883 LZ4_uncompress_unknownOutputSize(const char *source, char *dest, int isize,
884 int maxOutputSize)
885 {
886 /* Local Variables */
887 const BYTE *restrict ip = (const BYTE *) source;
888 const BYTE *const iend = ip + isize;
889 const BYTE *ref;
890
891 BYTE *op = (BYTE *) dest;
892 BYTE *const oend = op + maxOutputSize;
893 BYTE *cpy;
894
895 size_t dec32table[] = {0, 3, 2, 3, 0, 0, 0, 0};
896 #if LZ4_ARCH64
897 size_t dec64table[] = {0, 0, 0, (size_t)-1, 0, 1, 2, 3};
898 #endif
899
900 /* Main Loop */
901 while (ip < iend) {
902 unsigned token;
903 size_t length;
904
905 /* get runlength */
906 token = *ip++;
907 if ((length = (token >> ML_BITS)) == RUN_MASK) {
908 int s = 255;
909 while ((ip < iend) && (s == 255)) {
910 s = *ip++;
911 length += s;
912 }
913 }
914 /* copy literals */
915 cpy = op + length;
916 if ((cpy > oend - COPYLENGTH) ||
917 (ip + length > iend - COPYLENGTH)) {
918 if (cpy > oend)
919 /* Error: writes beyond output buffer */
920 goto _output_error;
921 if (ip + length != iend)
922 /*
923 * Error: LZ4 format requires to consume all
924 * input at this stage
925 */
926 goto _output_error;
927 (void) memcpy(op, ip, length);
928 op += length;
929 /* Necessarily EOF, due to parsing restrictions */
930 break;
931 }
932 LZ4_WILDCOPY(ip, op, cpy);
933 ip -= (op - cpy);
934 op = cpy;
935
936 /* get offset */
937 LZ4_READ_LITTLEENDIAN_16(ref, cpy, ip);
938 ip += 2;
939 if (ref < (BYTE * const) dest)
940 /*
941 * Error: offset creates reference outside of
942 * destination buffer
943 */
944 goto _output_error;
945
946 /* get matchlength */
947 if ((length = (token & ML_MASK)) == ML_MASK) {
948 while (ip < iend) {
949 int s = *ip++;
950 length += s;
951 if (s == 255)
952 continue;
953 break;
954 }
955 }
956 /* copy repeated sequence */
957 if unlikely(op - ref < STEPSIZE) {
958 #if LZ4_ARCH64
959 size_t dec64 = dec64table[op-ref];
960 #else
961 const int dec64 = 0;
962 #endif
963 op[0] = ref[0];
964 op[1] = ref[1];
965 op[2] = ref[2];
966 op[3] = ref[3];
967 op += 4;
968 ref += 4;
969 ref -= dec32table[op-ref];
970 A32(op) = A32(ref);
971 op += STEPSIZE - 4;
972 ref -= dec64;
973 } else {
974 LZ4_COPYSTEP(ref, op);
975 }
976 cpy = op + length - (STEPSIZE - 4);
977 if (cpy > oend - COPYLENGTH) {
978 if (cpy > oend)
979 /*
980 * Error: request to write outside of
981 * destination buffer
982 */
983 goto _output_error;
984 LZ4_SECURECOPY(ref, op, (oend - COPYLENGTH));
985 while (op < cpy)
986 *op++ = *ref++;
987 op = cpy;
988 if (op == oend)
989 /*
990 * Check EOF (should never happen, since
991 * last 5 bytes are supposed to be literals)
992 */
993 goto _output_error;
994 continue;
995 }
996 LZ4_SECURECOPY(ref, op, cpy);
997 op = cpy; /* correction */
998 }
999
1000 /* end of decoding */
1001 return (int)(((char *)op) - dest);
1002
1003 /* write overflow error detected */
1004 _output_error:
1005 return (int)(-(((char *)ip) - source));
1006 }
1007
1008 extern void
lz4_init(void)1009 lz4_init(void)
1010 {
1011
1012 #if HEAPMODE
1013 lz4_ctx_cache = kmem_cache_create("lz4_ctx", sizeof(struct refTables),
1014 0, NULL, NULL, NULL, NULL, NULL, 0);
1015 #endif
1016 }
1017
1018 extern void
lz4_fini(void)1019 lz4_fini(void)
1020 {
1021
1022 #if HEAPMODE
1023 kmem_cache_destroy(lz4_ctx_cache);
1024 #endif
1025 }
1026