1 /*
2 * LZ4 - Fast LZ compression algorithm
3 * Header File
4 * Copyright (C) 2011-2013, Yann Collet.
5 * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are
9 * met:
10 *
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above
14 * copyright notice, this list of conditions and the following disclaimer
15 * in the documentation and/or other materials provided with the
16 * distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 *
30 * You can contact the author at :
31 * - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html
32 * - LZ4 source repository : http://code.google.com/p/lz4/
33 */
34
35 #include <sys/zfs_context.h>
36 #include <sys/zio_compress.h>
37
38 static int real_LZ4_compress(const char *source, char *dest, int isize,
39 int osize);
40 static int LZ4_uncompress_unknownOutputSize(const char *source, char *dest,
41 int isize, int maxOutputSize);
42 static int LZ4_compressCtx(void *ctx, const char *source, char *dest,
43 int isize, int osize);
44 static int LZ4_compress64kCtx(void *ctx, const char *source, char *dest,
45 int isize, int osize);
46
47 static void *lz4_alloc(int flags);
48 static void lz4_free(void *ctx);
49
50 size_t
lz4_compress_zfs(void * s_start,void * d_start,size_t s_len,size_t d_len,int n)51 lz4_compress_zfs(void *s_start, void *d_start, size_t s_len,
52 size_t d_len, int n)
53 {
54 (void) n;
55 uint32_t bufsiz;
56 char *dest = d_start;
57
58 ASSERT(d_len >= sizeof (bufsiz));
59
60 bufsiz = real_LZ4_compress(s_start, &dest[sizeof (bufsiz)], s_len,
61 d_len - sizeof (bufsiz));
62
63 /* Signal an error if the compression routine returned zero. */
64 if (bufsiz == 0)
65 return (s_len);
66
67 /*
68 * The exact compressed size is needed by the decompression routine,
69 * so it is stored at the start of the buffer. Note that this may be
70 * less than the compressed block size, which is rounded up to a
71 * multiple of 1<<ashift.
72 */
73 *(uint32_t *)dest = BE_32(bufsiz);
74
75 return (bufsiz + sizeof (bufsiz));
76 }
77
78 int
lz4_decompress_zfs(void * s_start,void * d_start,size_t s_len,size_t d_len,int n)79 lz4_decompress_zfs(void *s_start, void *d_start, size_t s_len,
80 size_t d_len, int n)
81 {
82 (void) n;
83 const char *src = s_start;
84 uint32_t bufsiz = BE_IN32(src);
85
86 /* invalid compressed buffer size encoded at start */
87 if (bufsiz + sizeof (bufsiz) > s_len)
88 return (1);
89
90 /*
91 * Returns 0 on success (decompression function returned non-negative)
92 * and non-zero on failure (decompression function returned negative).
93 */
94 return (LZ4_uncompress_unknownOutputSize(&src[sizeof (bufsiz)],
95 d_start, bufsiz, d_len) < 0);
96 }
97
98 /*
99 * LZ4 API Description:
100 *
101 * Simple Functions:
102 * real_LZ4_compress() :
103 * isize : is the input size. Max supported value is ~1.9GB
104 * return : the number of bytes written in buffer dest
105 * or 0 if the compression fails (if LZ4_COMPRESSMIN is set).
106 * note : destination buffer must be already allocated.
107 * destination buffer must be sized to handle worst cases
108 * situations (input data not compressible) worst case size
109 * evaluation is provided by function LZ4_compressBound().
110 *
111 * real_LZ4_uncompress() :
112 * osize : is the output size, therefore the original size
113 * return : the number of bytes read in the source buffer.
114 * If the source stream is malformed, the function will stop
115 * decoding and return a negative result, indicating the byte
116 * position of the faulty instruction. This function never
117 * writes beyond dest + osize, and is therefore protected
118 * against malicious data packets.
119 * note : destination buffer must be already allocated
120 * note : real_LZ4_uncompress() is not used in ZFS so its code
121 * is not present here.
122 *
123 * Advanced Functions
124 *
125 * LZ4_compressBound() :
126 * Provides the maximum size that LZ4 may output in a "worst case"
127 * scenario (input data not compressible) primarily useful for memory
128 * allocation of output buffer.
129 *
130 * isize : is the input size. Max supported value is ~1.9GB
131 * return : maximum output size in a "worst case" scenario
132 * note : this function is limited by "int" range (2^31-1)
133 *
134 * LZ4_uncompress_unknownOutputSize() :
135 * isize : is the input size, therefore the compressed size
136 * maxOutputSize : is the size of the destination buffer (which must be
137 * already allocated)
138 * return : the number of bytes decoded in the destination buffer
139 * (necessarily <= maxOutputSize). If the source stream is
140 * malformed, the function will stop decoding and return a
141 * negative result, indicating the byte position of the faulty
142 * instruction. This function never writes beyond dest +
143 * maxOutputSize, and is therefore protected against malicious
144 * data packets.
145 * note : Destination buffer must be already allocated.
146 * This version is slightly slower than real_LZ4_uncompress()
147 *
148 * LZ4_compressCtx() :
149 * This function explicitly handles the CTX memory structure.
150 *
151 * ILLUMOS CHANGES: the CTX memory structure must be explicitly allocated
152 * by the caller (either on the stack or using kmem_cache_alloc). Passing
153 * NULL isn't valid.
154 *
155 * LZ4_compress64kCtx() :
156 * Same as LZ4_compressCtx(), but specific to small inputs (<64KB).
157 * isize *Must* be <64KB, otherwise the output will be corrupted.
158 *
159 * ILLUMOS CHANGES: the CTX memory structure must be explicitly allocated
160 * by the caller (either on the stack or using kmem_cache_alloc). Passing
161 * NULL isn't valid.
162 */
163
164 /*
165 * Tuning parameters
166 */
167
168 /*
169 * COMPRESSIONLEVEL: Increasing this value improves compression ratio
170 * Lowering this value reduces memory usage. Reduced memory usage
171 * typically improves speed, due to cache effect (ex: L1 32KB for Intel,
172 * L1 64KB for AMD). Memory usage formula : N->2^(N+2) Bytes
173 * (examples : 12 -> 16KB ; 17 -> 512KB)
174 */
175 #define COMPRESSIONLEVEL 12
176
177 /*
178 * NOTCOMPRESSIBLE_CONFIRMATION: Decreasing this value will make the
179 * algorithm skip faster data segments considered "incompressible".
180 * This may decrease compression ratio dramatically, but will be
181 * faster on incompressible data. Increasing this value will make
182 * the algorithm search more before declaring a segment "incompressible".
183 * This could improve compression a bit, but will be slower on
184 * incompressible data. The default value (6) is recommended.
185 */
186 #define NOTCOMPRESSIBLE_CONFIRMATION 6
187
188 /*
189 * BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE: This will provide a boost to
190 * performance for big endian cpu, but the resulting compressed stream
191 * will be incompatible with little-endian CPU. You can set this option
192 * to 1 in situations where data will stay within closed environment.
193 * This option is useless on Little_Endian CPU (such as x86).
194 */
195 /* #define BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE 1 */
196
197 /*
198 * CPU Feature Detection
199 */
200
201 /* 32 or 64 bits ? */
202 #if defined(_LP64)
203 #define LZ4_ARCH64 1
204 #else
205 #define LZ4_ARCH64 0
206 #endif
207
208 /*
209 * Little Endian or Big Endian?
210 * Note: overwrite the below #define if you know your architecture endianness.
211 */
212 #if defined(_ZFS_BIG_ENDIAN)
213 #define LZ4_BIG_ENDIAN 1
214 #else
215 /*
216 * Little Endian assumed. PDP Endian and other very rare endian format
217 * are unsupported.
218 */
219 #undef LZ4_BIG_ENDIAN
220 #endif
221
222 /*
223 * Unaligned memory access is automatically enabled for "common" CPU,
224 * such as x86. For others CPU, the compiler will be more cautious, and
225 * insert extra code to ensure aligned access is respected. If you know
226 * your target CPU supports unaligned memory access, you may want to
227 * force this option manually to improve performance
228 */
229 #if defined(__ARM_FEATURE_UNALIGNED)
230 #define LZ4_FORCE_UNALIGNED_ACCESS 1
231 #endif
232
233 /*
234 * Illumos : we can't use GCC's __builtin_ctz family of builtins in the
235 * kernel
236 * Linux : we can use GCC's __builtin_ctz family of builtins in the
237 * kernel
238 */
239 #undef LZ4_FORCE_SW_BITCOUNT
240 #if defined(__sparc)
241 #define LZ4_FORCE_SW_BITCOUNT
242 #endif
243
244 /*
245 * Compiler Options
246 */
247 /* Disable restrict */
248 #define restrict
249
250 /*
251 * Linux : GCC_VERSION is defined as of 3.9-rc1, so undefine it.
252 * torvalds/linux@3f3f8d2f48acfd8ed3b8e6b7377935da57b27b16
253 */
254 #ifdef GCC_VERSION
255 #undef GCC_VERSION
256 #endif
257
258 #define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
259
260 #if (GCC_VERSION >= 302) || (__INTEL_COMPILER >= 800) || defined(__clang__)
261 #define expect(expr, value) (__builtin_expect((expr), (value)))
262 #else
263 #define expect(expr, value) (expr)
264 #endif
265
266 #ifndef likely
267 #define likely(expr) expect((expr) != 0, 1)
268 #endif
269
270 #ifndef unlikely
271 #define unlikely(expr) expect((expr) != 0, 0)
272 #endif
273
274 #define lz4_bswap16(x) ((unsigned short int) ((((x) >> 8) & 0xffu) | \
275 (((x) & 0xffu) << 8)))
276
277 /* Basic types */
278 #define BYTE uint8_t
279 #define U16 uint16_t
280 #define U32 uint32_t
281 #define S32 int32_t
282 #define U64 uint64_t
283
284 #ifndef LZ4_FORCE_UNALIGNED_ACCESS
285 #pragma pack(1)
286 #endif
287
288 typedef struct _U16_S {
289 U16 v;
290 } U16_S;
291 typedef struct _U32_S {
292 U32 v;
293 } U32_S;
294 typedef struct _U64_S {
295 U64 v;
296 } U64_S;
297
298 #ifndef LZ4_FORCE_UNALIGNED_ACCESS
299 #pragma pack()
300 #endif
301
302 #define A64(x) (((U64_S *)(x))->v)
303 #define A32(x) (((U32_S *)(x))->v)
304 #define A16(x) (((U16_S *)(x))->v)
305
306 /*
307 * Constants
308 */
309 #define MINMATCH 4
310
311 #define HASH_LOG COMPRESSIONLEVEL
312 #define HASHTABLESIZE (1 << HASH_LOG)
313 #define HASH_MASK (HASHTABLESIZE - 1)
314
315 #define SKIPSTRENGTH (NOTCOMPRESSIBLE_CONFIRMATION > 2 ? \
316 NOTCOMPRESSIBLE_CONFIRMATION : 2)
317
318 #define COPYLENGTH 8
319 #define LASTLITERALS 5
320 #define MFLIMIT (COPYLENGTH + MINMATCH)
321 #define MINLENGTH (MFLIMIT + 1)
322
323 #define MAXD_LOG 16
324 #define MAX_DISTANCE ((1 << MAXD_LOG) - 1)
325
326 #define ML_BITS 4
327 #define ML_MASK ((1U<<ML_BITS)-1)
328 #define RUN_BITS (8-ML_BITS)
329 #define RUN_MASK ((1U<<RUN_BITS)-1)
330
331
332 /*
333 * Architecture-specific macros
334 */
335 #if LZ4_ARCH64
336 #define STEPSIZE 8
337 #define UARCH U64
338 #define AARCH A64
339 #define LZ4_COPYSTEP(s, d) A64(d) = A64(s); d += 8; s += 8;
340 #define LZ4_COPYPACKET(s, d) LZ4_COPYSTEP(s, d)
341 #define LZ4_SECURECOPY(s, d, e) if (d < e) LZ4_WILDCOPY(s, d, e)
342 #define HTYPE U32
343 #define INITBASE(base) const BYTE* const base = ip
344 #else /* !LZ4_ARCH64 */
345 #define STEPSIZE 4
346 #define UARCH U32
347 #define AARCH A32
348 #define LZ4_COPYSTEP(s, d) A32(d) = A32(s); d += 4; s += 4;
349 #define LZ4_COPYPACKET(s, d) LZ4_COPYSTEP(s, d); LZ4_COPYSTEP(s, d);
350 #define LZ4_SECURECOPY LZ4_WILDCOPY
351 #define HTYPE const BYTE *
352 #define INITBASE(base) const int base = 0
353 #endif /* !LZ4_ARCH64 */
354
355 #if (defined(LZ4_BIG_ENDIAN) && !defined(BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE))
356 #define LZ4_READ_LITTLEENDIAN_16(d, s, p) \
357 { U16 v = A16(p); v = lz4_bswap16(v); d = (s) - v; }
358 #define LZ4_WRITE_LITTLEENDIAN_16(p, i) \
359 { U16 v = (U16)(i); v = lz4_bswap16(v); A16(p) = v; p += 2; }
360 #else
361 #define LZ4_READ_LITTLEENDIAN_16(d, s, p) { d = (s) - A16(p); }
362 #define LZ4_WRITE_LITTLEENDIAN_16(p, v) { A16(p) = v; p += 2; }
363 #endif
364
365
366 /* Local structures */
367 struct refTables {
368 HTYPE hashTable[HASHTABLESIZE];
369 };
370
371
372 /* Macros */
373 #define LZ4_HASH_FUNCTION(i) (((i) * 2654435761U) >> ((MINMATCH * 8) - \
374 HASH_LOG))
375 #define LZ4_HASH_VALUE(p) LZ4_HASH_FUNCTION(A32(p))
376 #define LZ4_WILDCOPY(s, d, e) do { LZ4_COPYPACKET(s, d) } while (d < e);
377 #define LZ4_BLINDCOPY(s, d, l) { BYTE* e = (d) + l; LZ4_WILDCOPY(s, d, e); \
378 d = e; }
379
380
381 /* Private functions */
382 #if LZ4_ARCH64
383
384 static inline int
LZ4_NbCommonBytes(register U64 val)385 LZ4_NbCommonBytes(register U64 val)
386 {
387 #if defined(LZ4_BIG_ENDIAN)
388 #if ((defined(__GNUC__) && (GCC_VERSION >= 304)) || defined(__clang__)) && \
389 !defined(LZ4_FORCE_SW_BITCOUNT)
390 return (__builtin_clzll(val) >> 3);
391 #else
392 int r;
393 if (!(val >> 32)) {
394 r = 4;
395 } else {
396 r = 0;
397 val >>= 32;
398 }
399 if (!(val >> 16)) {
400 r += 2;
401 val >>= 8;
402 } else {
403 val >>= 24;
404 }
405 r += (!val);
406 return (r);
407 #endif
408 #else
409 #if ((defined(__GNUC__) && (GCC_VERSION >= 304)) || defined(__clang__)) && \
410 !defined(LZ4_FORCE_SW_BITCOUNT)
411 return (__builtin_ctzll(val) >> 3);
412 #else
413 static const int DeBruijnBytePos[64] =
414 { 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 1, 5,
415 3, 5, 1, 3, 4, 4, 2, 5, 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5,
416 5, 3, 4, 5, 6, 7, 1, 2, 4, 6, 4,
417 4, 5, 7, 2, 6, 5, 7, 6, 7, 7
418 };
419 return DeBruijnBytePos[((U64) ((val & -val) * 0x0218A392CDABBD3F)) >>
420 58];
421 #endif
422 #endif
423 }
424
425 #else
426
427 static inline int
LZ4_NbCommonBytes(register U32 val)428 LZ4_NbCommonBytes(register U32 val)
429 {
430 #if defined(LZ4_BIG_ENDIAN)
431 #if ((defined(__GNUC__) && (GCC_VERSION >= 304)) || defined(__clang__)) && \
432 !defined(LZ4_FORCE_SW_BITCOUNT)
433 return (__builtin_clz(val) >> 3);
434 #else
435 int r;
436 if (!(val >> 16)) {
437 r = 2;
438 val >>= 8;
439 } else {
440 r = 0;
441 val >>= 24;
442 }
443 r += (!val);
444 return (r);
445 #endif
446 #else
447 #if defined(__GNUC__) && (GCC_VERSION >= 304) && \
448 !defined(LZ4_FORCE_SW_BITCOUNT)
449 return (__builtin_ctz(val) >> 3);
450 #else
451 static const int DeBruijnBytePos[32] = {
452 0, 0, 3, 0, 3, 1, 3, 0,
453 3, 2, 2, 1, 3, 2, 0, 1,
454 3, 3, 1, 2, 2, 2, 2, 0,
455 3, 1, 2, 0, 1, 0, 1, 1
456 };
457 return DeBruijnBytePos[((U32) ((val & -(S32) val) * 0x077CB531U)) >>
458 27];
459 #endif
460 #endif
461 }
462
463 #endif
464
465 /* Compression functions */
466
467 static int
LZ4_compressCtx(void * ctx,const char * source,char * dest,int isize,int osize)468 LZ4_compressCtx(void *ctx, const char *source, char *dest, int isize,
469 int osize)
470 {
471 struct refTables *srt = (struct refTables *)ctx;
472 HTYPE *HashTable = (HTYPE *) (srt->hashTable);
473
474 const BYTE *ip = (BYTE *) source;
475 INITBASE(base);
476 const BYTE *anchor = ip;
477 const BYTE *const iend = ip + isize;
478 const BYTE *const oend = (BYTE *) dest + osize;
479 const BYTE *const mflimit = iend - MFLIMIT;
480 #define matchlimit (iend - LASTLITERALS)
481
482 BYTE *op = (BYTE *) dest;
483
484 int len, length;
485 const int skipStrength = SKIPSTRENGTH;
486 U32 forwardH;
487
488
489 /* Init */
490 if (isize < MINLENGTH)
491 goto _last_literals;
492
493 /* First Byte */
494 HashTable[LZ4_HASH_VALUE(ip)] = ip - base;
495 ip++;
496 forwardH = LZ4_HASH_VALUE(ip);
497
498 /* Main Loop */
499 for (;;) {
500 int findMatchAttempts = (1U << skipStrength) + 3;
501 const BYTE *forwardIp = ip;
502 const BYTE *ref;
503 BYTE *token;
504
505 /* Find a match */
506 do {
507 U32 h = forwardH;
508 int step = findMatchAttempts++ >> skipStrength;
509 ip = forwardIp;
510 forwardIp = ip + step;
511
512 if (unlikely(forwardIp > mflimit)) {
513 goto _last_literals;
514 }
515
516 forwardH = LZ4_HASH_VALUE(forwardIp);
517 ref = base + HashTable[h];
518 HashTable[h] = ip - base;
519
520 } while ((ref < ip - MAX_DISTANCE) || (A32(ref) != A32(ip)));
521
522 /* Catch up */
523 while ((ip > anchor) && (ref > (BYTE *) source) &&
524 unlikely(ip[-1] == ref[-1])) {
525 ip--;
526 ref--;
527 }
528
529 /* Encode Literal length */
530 length = ip - anchor;
531 token = op++;
532
533 /* Check output limit */
534 if (unlikely(op + length + (2 + 1 + LASTLITERALS) +
535 (length >> 8) > oend))
536 return (0);
537
538 if (length >= (int)RUN_MASK) {
539 *token = (RUN_MASK << ML_BITS);
540 len = length - RUN_MASK;
541 for (; len > 254; len -= 255)
542 *op++ = 255;
543 *op++ = (BYTE)len;
544 } else
545 *token = (length << ML_BITS);
546
547 /* Copy Literals */
548 LZ4_BLINDCOPY(anchor, op, length);
549
550 _next_match:
551 /* Encode Offset */
552 LZ4_WRITE_LITTLEENDIAN_16(op, ip - ref);
553
554 /* Start Counting */
555 ip += MINMATCH;
556 ref += MINMATCH; /* MinMatch verified */
557 anchor = ip;
558 while (likely(ip < matchlimit - (STEPSIZE - 1))) {
559 UARCH diff = AARCH(ref) ^ AARCH(ip);
560 if (!diff) {
561 ip += STEPSIZE;
562 ref += STEPSIZE;
563 continue;
564 }
565 ip += LZ4_NbCommonBytes(diff);
566 goto _endCount;
567 }
568 #if LZ4_ARCH64
569 if ((ip < (matchlimit - 3)) && (A32(ref) == A32(ip))) {
570 ip += 4;
571 ref += 4;
572 }
573 #endif
574 if ((ip < (matchlimit - 1)) && (A16(ref) == A16(ip))) {
575 ip += 2;
576 ref += 2;
577 }
578 if ((ip < matchlimit) && (*ref == *ip))
579 ip++;
580 _endCount:
581
582 /* Encode MatchLength */
583 len = (ip - anchor);
584 /* Check output limit */
585 if (unlikely(op + (1 + LASTLITERALS) + (len >> 8) > oend))
586 return (0);
587 if (len >= (int)ML_MASK) {
588 *token += ML_MASK;
589 len -= ML_MASK;
590 for (; len > 509; len -= 510) {
591 *op++ = 255;
592 *op++ = 255;
593 }
594 if (len > 254) {
595 len -= 255;
596 *op++ = 255;
597 }
598 *op++ = (BYTE)len;
599 } else
600 *token += len;
601
602 /* Test end of chunk */
603 if (ip > mflimit) {
604 anchor = ip;
605 break;
606 }
607 /* Fill table */
608 HashTable[LZ4_HASH_VALUE(ip - 2)] = ip - 2 - base;
609
610 /* Test next position */
611 ref = base + HashTable[LZ4_HASH_VALUE(ip)];
612 HashTable[LZ4_HASH_VALUE(ip)] = ip - base;
613 if ((ref > ip - (MAX_DISTANCE + 1)) && (A32(ref) == A32(ip))) {
614 token = op++;
615 *token = 0;
616 goto _next_match;
617 }
618 /* Prepare next loop */
619 anchor = ip++;
620 forwardH = LZ4_HASH_VALUE(ip);
621 }
622
623 _last_literals:
624 /* Encode Last Literals */
625 {
626 int lastRun = iend - anchor;
627 if (op + lastRun + 1 + ((lastRun + 255 - RUN_MASK) / 255) >
628 oend)
629 return (0);
630 if (lastRun >= (int)RUN_MASK) {
631 *op++ = (RUN_MASK << ML_BITS);
632 lastRun -= RUN_MASK;
633 for (; lastRun > 254; lastRun -= 255) {
634 *op++ = 255;
635 }
636 *op++ = (BYTE)lastRun;
637 } else
638 *op++ = (lastRun << ML_BITS);
639 (void) memcpy(op, anchor, iend - anchor);
640 op += iend - anchor;
641 }
642
643 /* End */
644 return (int)(((char *)op) - dest);
645 }
646
647
648
649 /* Note : this function is valid only if isize < LZ4_64KLIMIT */
650 #define LZ4_64KLIMIT ((1 << 16) + (MFLIMIT - 1))
651 #define HASHLOG64K (HASH_LOG + 1)
652 #define HASH64KTABLESIZE (1U << HASHLOG64K)
653 #define LZ4_HASH64K_FUNCTION(i) (((i) * 2654435761U) >> ((MINMATCH*8) - \
654 HASHLOG64K))
655 #define LZ4_HASH64K_VALUE(p) LZ4_HASH64K_FUNCTION(A32(p))
656
657 static int
LZ4_compress64kCtx(void * ctx,const char * source,char * dest,int isize,int osize)658 LZ4_compress64kCtx(void *ctx, const char *source, char *dest, int isize,
659 int osize)
660 {
661 struct refTables *srt = (struct refTables *)ctx;
662 U16 *HashTable = (U16 *) (srt->hashTable);
663
664 const BYTE *ip = (BYTE *) source;
665 const BYTE *anchor = ip;
666 const BYTE *const base = ip;
667 const BYTE *const iend = ip + isize;
668 const BYTE *const oend = (BYTE *) dest + osize;
669 const BYTE *const mflimit = iend - MFLIMIT;
670 #define matchlimit (iend - LASTLITERALS)
671
672 BYTE *op = (BYTE *) dest;
673
674 int len, length;
675 const int skipStrength = SKIPSTRENGTH;
676 U32 forwardH;
677
678 /* Init */
679 if (isize < MINLENGTH)
680 goto _last_literals;
681
682 /* First Byte */
683 ip++;
684 forwardH = LZ4_HASH64K_VALUE(ip);
685
686 /* Main Loop */
687 for (;;) {
688 int findMatchAttempts = (1U << skipStrength) + 3;
689 const BYTE *forwardIp = ip;
690 const BYTE *ref;
691 BYTE *token;
692
693 /* Find a match */
694 do {
695 U32 h = forwardH;
696 int step = findMatchAttempts++ >> skipStrength;
697 ip = forwardIp;
698 forwardIp = ip + step;
699
700 if (forwardIp > mflimit) {
701 goto _last_literals;
702 }
703
704 forwardH = LZ4_HASH64K_VALUE(forwardIp);
705 ref = base + HashTable[h];
706 HashTable[h] = ip - base;
707
708 } while (A32(ref) != A32(ip));
709
710 /* Catch up */
711 while ((ip > anchor) && (ref > (BYTE *) source) &&
712 (ip[-1] == ref[-1])) {
713 ip--;
714 ref--;
715 }
716
717 /* Encode Literal length */
718 length = ip - anchor;
719 token = op++;
720
721 /* Check output limit */
722 if (unlikely(op + length + (2 + 1 + LASTLITERALS) +
723 (length >> 8) > oend))
724 return (0);
725
726 if (length >= (int)RUN_MASK) {
727 *token = (RUN_MASK << ML_BITS);
728 len = length - RUN_MASK;
729 for (; len > 254; len -= 255)
730 *op++ = 255;
731 *op++ = (BYTE)len;
732 } else
733 *token = (length << ML_BITS);
734
735 /* Copy Literals */
736 LZ4_BLINDCOPY(anchor, op, length);
737
738 _next_match:
739 /* Encode Offset */
740 LZ4_WRITE_LITTLEENDIAN_16(op, ip - ref);
741
742 /* Start Counting */
743 ip += MINMATCH;
744 ref += MINMATCH; /* MinMatch verified */
745 anchor = ip;
746 while (ip < matchlimit - (STEPSIZE - 1)) {
747 UARCH diff = AARCH(ref) ^ AARCH(ip);
748 if (!diff) {
749 ip += STEPSIZE;
750 ref += STEPSIZE;
751 continue;
752 }
753 ip += LZ4_NbCommonBytes(diff);
754 goto _endCount;
755 }
756 #if LZ4_ARCH64
757 if ((ip < (matchlimit - 3)) && (A32(ref) == A32(ip))) {
758 ip += 4;
759 ref += 4;
760 }
761 #endif
762 if ((ip < (matchlimit - 1)) && (A16(ref) == A16(ip))) {
763 ip += 2;
764 ref += 2;
765 }
766 if ((ip < matchlimit) && (*ref == *ip))
767 ip++;
768 _endCount:
769
770 /* Encode MatchLength */
771 len = (ip - anchor);
772 /* Check output limit */
773 if (unlikely(op + (1 + LASTLITERALS) + (len >> 8) > oend))
774 return (0);
775 if (len >= (int)ML_MASK) {
776 *token += ML_MASK;
777 len -= ML_MASK;
778 for (; len > 509; len -= 510) {
779 *op++ = 255;
780 *op++ = 255;
781 }
782 if (len > 254) {
783 len -= 255;
784 *op++ = 255;
785 }
786 *op++ = (BYTE)len;
787 } else
788 *token += len;
789
790 /* Test end of chunk */
791 if (ip > mflimit) {
792 anchor = ip;
793 break;
794 }
795 /* Fill table */
796 HashTable[LZ4_HASH64K_VALUE(ip - 2)] = ip - 2 - base;
797
798 /* Test next position */
799 ref = base + HashTable[LZ4_HASH64K_VALUE(ip)];
800 HashTable[LZ4_HASH64K_VALUE(ip)] = ip - base;
801 if (A32(ref) == A32(ip)) {
802 token = op++;
803 *token = 0;
804 goto _next_match;
805 }
806 /* Prepare next loop */
807 anchor = ip++;
808 forwardH = LZ4_HASH64K_VALUE(ip);
809 }
810
811 _last_literals:
812 /* Encode Last Literals */
813 {
814 int lastRun = iend - anchor;
815 if (op + lastRun + 1 + ((lastRun + 255 - RUN_MASK) / 255) >
816 oend)
817 return (0);
818 if (lastRun >= (int)RUN_MASK) {
819 *op++ = (RUN_MASK << ML_BITS);
820 lastRun -= RUN_MASK;
821 for (; lastRun > 254; lastRun -= 255)
822 *op++ = 255;
823 *op++ = (BYTE)lastRun;
824 } else
825 *op++ = (lastRun << ML_BITS);
826 (void) memcpy(op, anchor, iend - anchor);
827 op += iend - anchor;
828 }
829
830 /* End */
831 return (int)(((char *)op) - dest);
832 }
833
834 static int
real_LZ4_compress(const char * source,char * dest,int isize,int osize)835 real_LZ4_compress(const char *source, char *dest, int isize, int osize)
836 {
837 void *ctx;
838 int result;
839
840 ctx = lz4_alloc(KM_SLEEP);
841
842 /*
843 * out of kernel memory, gently fall through - this will disable
844 * compression in zio_compress_data
845 */
846 if (ctx == NULL)
847 return (0);
848
849 memset(ctx, 0, sizeof (struct refTables));
850
851 if (isize < LZ4_64KLIMIT)
852 result = LZ4_compress64kCtx(ctx, source, dest, isize, osize);
853 else
854 result = LZ4_compressCtx(ctx, source, dest, isize, osize);
855
856 lz4_free(ctx);
857 return (result);
858 }
859
860 /* Decompression functions */
861
862 /*
863 * Note: The decoding functions real_LZ4_uncompress() and
864 * LZ4_uncompress_unknownOutputSize() are safe against "buffer overflow"
865 * attack type. They will never write nor read outside of the provided
866 * output buffers. LZ4_uncompress_unknownOutputSize() also insures that
867 * it will never read outside of the input buffer. A corrupted input
868 * will produce an error result, a negative int, indicating the position
869 * of the error within input stream.
870 *
871 * Note[2]: real_LZ4_uncompress(), referred to above, is not used in ZFS so
872 * its code is not present here.
873 */
874
875 static const int dec32table[] = {0, 3, 2, 3, 0, 0, 0, 0};
876 #if LZ4_ARCH64
877 static const int dec64table[] = {0, 0, 0, -1, 0, 1, 2, 3};
878 #endif
879
880 static int
LZ4_uncompress_unknownOutputSize(const char * source,char * dest,int isize,int maxOutputSize)881 LZ4_uncompress_unknownOutputSize(const char *source, char *dest, int isize,
882 int maxOutputSize)
883 {
884 /* Local Variables */
885 const BYTE *restrict ip = (const BYTE *) source;
886 const BYTE *const iend = ip + isize;
887 const BYTE *ref;
888
889 BYTE *op = (BYTE *) dest;
890 BYTE *const oend = op + maxOutputSize;
891 BYTE *cpy;
892
893 /* Main Loop */
894 while (ip < iend) {
895 unsigned token;
896 size_t length;
897
898 /* get runlength */
899 token = *ip++;
900 if ((length = (token >> ML_BITS)) == RUN_MASK) {
901 int s = 255;
902 while ((ip < iend) && (s == 255)) {
903 s = *ip++;
904 if (unlikely(length > (size_t)(length + s)))
905 goto _output_error;
906 length += s;
907 }
908 }
909 /* copy literals */
910 cpy = op + length;
911 /* CORNER-CASE: cpy might overflow. */
912 if (cpy < op)
913 goto _output_error; /* cpy was overflowed, bail! */
914 if ((cpy > oend - COPYLENGTH) ||
915 (ip + length > iend - COPYLENGTH)) {
916 if (cpy > oend)
917 /* Error: writes beyond output buffer */
918 goto _output_error;
919 if (ip + length != iend)
920 /*
921 * Error: LZ4 format requires to consume all
922 * input at this stage
923 */
924 goto _output_error;
925 (void) memcpy(op, ip, length);
926 op += length;
927 /* Necessarily EOF, due to parsing restrictions */
928 break;
929 }
930 LZ4_WILDCOPY(ip, op, cpy);
931 ip -= (op - cpy);
932 op = cpy;
933
934 /* get offset */
935 LZ4_READ_LITTLEENDIAN_16(ref, cpy, ip);
936 ip += 2;
937 if (ref < (BYTE * const) dest)
938 /*
939 * Error: offset creates reference outside of
940 * destination buffer
941 */
942 goto _output_error;
943
944 /* get matchlength */
945 if ((length = (token & ML_MASK)) == ML_MASK) {
946 while (ip < iend) {
947 int s = *ip++;
948 if (unlikely(length > (size_t)(length + s)))
949 goto _output_error;
950 length += s;
951 if (s == 255)
952 continue;
953 break;
954 }
955 }
956 /* copy repeated sequence */
957 if (unlikely(op - ref < STEPSIZE)) {
958 #if LZ4_ARCH64
959 int dec64 = dec64table[op - ref];
960 #else
961 const int dec64 = 0;
962 #endif
963 op[0] = ref[0];
964 op[1] = ref[1];
965 op[2] = ref[2];
966 op[3] = ref[3];
967 op += 4;
968 ref += 4;
969 ref -= dec32table[op - ref];
970 A32(op) = A32(ref);
971 op += STEPSIZE - 4;
972 ref -= dec64;
973 } else {
974 LZ4_COPYSTEP(ref, op);
975 }
976 cpy = op + length - (STEPSIZE - 4);
977 if (cpy > oend - COPYLENGTH) {
978 if (cpy > oend)
979 /*
980 * Error: request to write outside of
981 * destination buffer
982 */
983 goto _output_error;
984 #if LZ4_ARCH64
985 if ((ref + COPYLENGTH) > oend)
986 #else
987 if ((ref + COPYLENGTH) > oend ||
988 (op + COPYLENGTH) > oend)
989 #endif
990 goto _output_error;
991 LZ4_SECURECOPY(ref, op, (oend - COPYLENGTH));
992 while (op < cpy)
993 *op++ = *ref++;
994 op = cpy;
995 if (op == oend)
996 /*
997 * Check EOF (should never happen, since
998 * last 5 bytes are supposed to be literals)
999 */
1000 goto _output_error;
1001 continue;
1002 }
1003 LZ4_SECURECOPY(ref, op, cpy);
1004 op = cpy; /* correction */
1005 }
1006
1007 /* end of decoding */
1008 return (int)(((char *)op) - dest);
1009
1010 /* write overflow error detected */
1011 _output_error:
1012 return (-1);
1013 }
1014
1015 #ifdef __FreeBSD__
1016 /*
1017 * FreeBSD has 4, 8 and 16 KB malloc zones which can be used here.
1018 * Should struct refTables get resized this may need to be revisited, hence
1019 * compiler-time asserts.
1020 */
1021 _Static_assert(sizeof(struct refTables) <= 16384,
1022 "refTables too big for malloc");
1023 _Static_assert((sizeof(struct refTables) % 4096) == 0,
1024 "refTables not a multiple of page size");
1025 #else
1026 #define ZFS_LZ4_USE_CACHE
1027 #endif
1028
1029 #ifdef ZFS_LZ4_USE_CACHE
1030 static kmem_cache_t *lz4_cache;
1031
1032 void
lz4_init(void)1033 lz4_init(void)
1034 {
1035 lz4_cache = kmem_cache_create("lz4_cache",
1036 sizeof (struct refTables), 0, NULL, NULL, NULL, NULL, NULL, 0);
1037 }
1038
1039 void
lz4_fini(void)1040 lz4_fini(void)
1041 {
1042 if (lz4_cache) {
1043 kmem_cache_destroy(lz4_cache);
1044 lz4_cache = NULL;
1045 }
1046 }
1047
1048 static void *
lz4_alloc(int flags)1049 lz4_alloc(int flags)
1050 {
1051 ASSERT(lz4_cache != NULL);
1052 return (kmem_cache_alloc(lz4_cache, flags));
1053 }
1054
1055 static void
lz4_free(void * ctx)1056 lz4_free(void *ctx)
1057 {
1058 kmem_cache_free(lz4_cache, ctx);
1059 }
1060 #else
1061 void
lz4_init(void)1062 lz4_init(void)
1063 {
1064 }
1065
1066 void
lz4_fini(void)1067 lz4_fini(void)
1068 {
1069 }
1070
1071 static void *
lz4_alloc(int flags)1072 lz4_alloc(int flags)
1073 {
1074 return (kmem_alloc(sizeof (struct refTables), flags));
1075 }
1076
1077 static void
lz4_free(void * ctx)1078 lz4_free(void *ctx)
1079 {
1080 kmem_free(ctx, sizeof (struct refTables));
1081 }
1082 #endif
1083