xref: /freebsd-13-stable/sys/contrib/openzfs/module/zfs/lz4.c (revision bd2e56ef47d5a2c69f6f8e092abfd27a4d469d1e)
1 /*
2  * LZ4 - Fast LZ compression algorithm
3  * Header File
4  * Copyright (C) 2011-2013, Yann Collet.
5  * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions are
9  * met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  * notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above
14  * copyright notice, this list of conditions and the following disclaimer
15  * in the documentation and/or other materials provided with the
16  * distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29  *
30  * You can contact the author at :
31  * - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html
32  * - LZ4 source repository : http://code.google.com/p/lz4/
33  */
34 
35 #include <sys/zfs_context.h>
36 #include <sys/zio_compress.h>
37 
38 static int real_LZ4_compress(const char *source, char *dest, int isize,
39     int osize);
40 static int LZ4_uncompress_unknownOutputSize(const char *source, char *dest,
41     int isize, int maxOutputSize);
42 static int LZ4_compressCtx(void *ctx, const char *source, char *dest,
43     int isize, int osize);
44 static int LZ4_compress64kCtx(void *ctx, const char *source, char *dest,
45     int isize, int osize);
46 
47 static void *lz4_alloc(int flags);
48 static void lz4_free(void *ctx);
49 
50 size_t
lz4_compress_zfs(void * s_start,void * d_start,size_t s_len,size_t d_len,int n)51 lz4_compress_zfs(void *s_start, void *d_start, size_t s_len,
52     size_t d_len, int n)
53 {
54 	(void) n;
55 	uint32_t bufsiz;
56 	char *dest = d_start;
57 
58 	ASSERT(d_len >= sizeof (bufsiz));
59 
60 	bufsiz = real_LZ4_compress(s_start, &dest[sizeof (bufsiz)], s_len,
61 	    d_len - sizeof (bufsiz));
62 
63 	/* Signal an error if the compression routine returned zero. */
64 	if (bufsiz == 0)
65 		return (s_len);
66 
67 	/*
68 	 * The exact compressed size is needed by the decompression routine,
69 	 * so it is stored at the start of the buffer. Note that this may be
70 	 * less than the compressed block size, which is rounded up to a
71 	 * multiple of 1<<ashift.
72 	 */
73 	*(uint32_t *)dest = BE_32(bufsiz);
74 
75 	return (bufsiz + sizeof (bufsiz));
76 }
77 
78 int
lz4_decompress_zfs(void * s_start,void * d_start,size_t s_len,size_t d_len,int n)79 lz4_decompress_zfs(void *s_start, void *d_start, size_t s_len,
80     size_t d_len, int n)
81 {
82 	(void) n;
83 	const char *src = s_start;
84 	uint32_t bufsiz = BE_IN32(src);
85 
86 	/* invalid compressed buffer size encoded at start */
87 	if (bufsiz + sizeof (bufsiz) > s_len)
88 		return (1);
89 
90 	/*
91 	 * Returns 0 on success (decompression function returned non-negative)
92 	 * and non-zero on failure (decompression function returned negative).
93 	 */
94 	return (LZ4_uncompress_unknownOutputSize(&src[sizeof (bufsiz)],
95 	    d_start, bufsiz, d_len) < 0);
96 }
97 
98 /*
99  * LZ4 API Description:
100  *
101  * Simple Functions:
102  * real_LZ4_compress() :
103  * 	isize  : is the input size. Max supported value is ~1.9GB
104  * 	return : the number of bytes written in buffer dest
105  *		 or 0 if the compression fails (if LZ4_COMPRESSMIN is set).
106  * 	note : destination buffer must be already allocated.
107  * 		destination buffer must be sized to handle worst cases
108  * 		situations (input data not compressible) worst case size
109  * 		evaluation is provided by function LZ4_compressBound().
110  *
111  * real_LZ4_uncompress() :
112  * 	osize  : is the output size, therefore the original size
113  * 	return : the number of bytes read in the source buffer.
114  * 		If the source stream is malformed, the function will stop
115  * 		decoding and return a negative result, indicating the byte
116  * 		position of the faulty instruction. This function never
117  * 		writes beyond dest + osize, and is therefore protected
118  * 		against malicious data packets.
119  * 	note : destination buffer must be already allocated
120  *	note : real_LZ4_uncompress() is not used in ZFS so its code
121  *	       is not present here.
122  *
123  * Advanced Functions
124  *
125  * LZ4_compressBound() :
126  * 	Provides the maximum size that LZ4 may output in a "worst case"
127  * 	scenario (input data not compressible) primarily useful for memory
128  * 	allocation of output buffer.
129  *
130  * 	isize  : is the input size. Max supported value is ~1.9GB
131  * 	return : maximum output size in a "worst case" scenario
132  * 	note : this function is limited by "int" range (2^31-1)
133  *
134  * LZ4_uncompress_unknownOutputSize() :
135  * 	isize  : is the input size, therefore the compressed size
136  * 	maxOutputSize : is the size of the destination buffer (which must be
137  * 		already allocated)
138  * 	return : the number of bytes decoded in the destination buffer
139  * 		(necessarily <= maxOutputSize). If the source stream is
140  * 		malformed, the function will stop decoding and return a
141  * 		negative result, indicating the byte position of the faulty
142  * 		instruction. This function never writes beyond dest +
143  * 		maxOutputSize, and is therefore protected against malicious
144  * 		data packets.
145  * 	note   : Destination buffer must be already allocated.
146  *		This version is slightly slower than real_LZ4_uncompress()
147  *
148  * LZ4_compressCtx() :
149  * 	This function explicitly handles the CTX memory structure.
150  *
151  * 	ILLUMOS CHANGES: the CTX memory structure must be explicitly allocated
152  * 	by the caller (either on the stack or using kmem_cache_alloc). Passing
153  * 	NULL isn't valid.
154  *
155  * LZ4_compress64kCtx() :
156  * 	Same as LZ4_compressCtx(), but specific to small inputs (<64KB).
157  * 	isize *Must* be <64KB, otherwise the output will be corrupted.
158  *
159  * 	ILLUMOS CHANGES: the CTX memory structure must be explicitly allocated
160  * 	by the caller (either on the stack or using kmem_cache_alloc). Passing
161  * 	NULL isn't valid.
162  */
163 
164 /*
165  * Tuning parameters
166  */
167 
168 /*
169  * COMPRESSIONLEVEL: Increasing this value improves compression ratio
170  *	 Lowering this value reduces memory usage. Reduced memory usage
171  *	typically improves speed, due to cache effect (ex: L1 32KB for Intel,
172  *	L1 64KB for AMD). Memory usage formula : N->2^(N+2) Bytes
173  *	(examples : 12 -> 16KB ; 17 -> 512KB)
174  */
175 #define	COMPRESSIONLEVEL 12
176 
177 /*
178  * NOTCOMPRESSIBLE_CONFIRMATION: Decreasing this value will make the
179  *	algorithm skip faster data segments considered "incompressible".
180  *	This may decrease compression ratio dramatically, but will be
181  *	faster on incompressible data. Increasing this value will make
182  *	the algorithm search more before declaring a segment "incompressible".
183  *	This could improve compression a bit, but will be slower on
184  *	incompressible data. The default value (6) is recommended.
185  */
186 #define	NOTCOMPRESSIBLE_CONFIRMATION 6
187 
188 /*
189  * BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE: This will provide a boost to
190  * performance for big endian cpu, but the resulting compressed stream
191  * will be incompatible with little-endian CPU. You can set this option
192  * to 1 in situations where data will stay within closed environment.
193  * This option is useless on Little_Endian CPU (such as x86).
194  */
195 /* #define	BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE 1 */
196 
197 /*
198  * CPU Feature Detection
199  */
200 
201 /* 32 or 64 bits ? */
202 #if defined(_LP64)
203 #define	LZ4_ARCH64 1
204 #else
205 #define	LZ4_ARCH64 0
206 #endif
207 
208 /*
209  * Little Endian or Big Endian?
210  * Note: overwrite the below #define if you know your architecture endianness.
211  */
212 #if defined(_ZFS_BIG_ENDIAN)
213 #define	LZ4_BIG_ENDIAN 1
214 #else
215 /*
216  * Little Endian assumed. PDP Endian and other very rare endian format
217  * are unsupported.
218  */
219 #undef LZ4_BIG_ENDIAN
220 #endif
221 
222 /*
223  * Unaligned memory access is automatically enabled for "common" CPU,
224  * such as x86. For others CPU, the compiler will be more cautious, and
225  * insert extra code to ensure aligned access is respected. If you know
226  * your target CPU supports unaligned memory access, you may want to
227  * force this option manually to improve performance
228  */
229 #if defined(__ARM_FEATURE_UNALIGNED)
230 #define	LZ4_FORCE_UNALIGNED_ACCESS 1
231 #endif
232 
233 /*
234  * Illumos : we can't use GCC's __builtin_ctz family of builtins in the
235  * kernel
236  * Linux : we can use GCC's __builtin_ctz family of builtins in the
237  * kernel
238  */
239 #undef	LZ4_FORCE_SW_BITCOUNT
240 #if defined(__sparc)
241 #define	LZ4_FORCE_SW_BITCOUNT
242 #endif
243 
244 /*
245  * Compiler Options
246  */
247 /* Disable restrict */
248 #define	restrict
249 
250 /*
251  * Linux : GCC_VERSION is defined as of 3.9-rc1, so undefine it.
252  * torvalds/linux@3f3f8d2f48acfd8ed3b8e6b7377935da57b27b16
253  */
254 #ifdef GCC_VERSION
255 #undef GCC_VERSION
256 #endif
257 
258 #define	GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
259 
260 #if (GCC_VERSION >= 302) || (__INTEL_COMPILER >= 800) || defined(__clang__)
261 #define	expect(expr, value)    (__builtin_expect((expr), (value)))
262 #else
263 #define	expect(expr, value)    (expr)
264 #endif
265 
266 #ifndef likely
267 #define	likely(expr)	expect((expr) != 0, 1)
268 #endif
269 
270 #ifndef unlikely
271 #define	unlikely(expr)	expect((expr) != 0, 0)
272 #endif
273 
274 #define	lz4_bswap16(x) ((unsigned short int) ((((x) >> 8) & 0xffu) | \
275 	(((x) & 0xffu) << 8)))
276 
277 /* Basic types */
278 #define	BYTE	uint8_t
279 #define	U16	uint16_t
280 #define	U32	uint32_t
281 #define	S32	int32_t
282 #define	U64	uint64_t
283 
284 #ifndef LZ4_FORCE_UNALIGNED_ACCESS
285 #pragma pack(1)
286 #endif
287 
288 typedef struct _U16_S {
289 	U16 v;
290 } U16_S;
291 typedef struct _U32_S {
292 	U32 v;
293 } U32_S;
294 typedef struct _U64_S {
295 	U64 v;
296 } U64_S;
297 
298 #ifndef LZ4_FORCE_UNALIGNED_ACCESS
299 #pragma pack()
300 #endif
301 
302 #define	A64(x) (((U64_S *)(x))->v)
303 #define	A32(x) (((U32_S *)(x))->v)
304 #define	A16(x) (((U16_S *)(x))->v)
305 
306 /*
307  * Constants
308  */
309 #define	MINMATCH 4
310 
311 #define	HASH_LOG COMPRESSIONLEVEL
312 #define	HASHTABLESIZE (1 << HASH_LOG)
313 #define	HASH_MASK (HASHTABLESIZE - 1)
314 
315 #define	SKIPSTRENGTH (NOTCOMPRESSIBLE_CONFIRMATION > 2 ? \
316 	NOTCOMPRESSIBLE_CONFIRMATION : 2)
317 
318 #define	COPYLENGTH 8
319 #define	LASTLITERALS 5
320 #define	MFLIMIT (COPYLENGTH + MINMATCH)
321 #define	MINLENGTH (MFLIMIT + 1)
322 
323 #define	MAXD_LOG 16
324 #define	MAX_DISTANCE ((1 << MAXD_LOG) - 1)
325 
326 #define	ML_BITS 4
327 #define	ML_MASK ((1U<<ML_BITS)-1)
328 #define	RUN_BITS (8-ML_BITS)
329 #define	RUN_MASK ((1U<<RUN_BITS)-1)
330 
331 
332 /*
333  * Architecture-specific macros
334  */
335 #if LZ4_ARCH64
336 #define	STEPSIZE 8
337 #define	UARCH U64
338 #define	AARCH A64
339 #define	LZ4_COPYSTEP(s, d)	A64(d) = A64(s); d += 8; s += 8;
340 #define	LZ4_COPYPACKET(s, d)	LZ4_COPYSTEP(s, d)
341 #define	LZ4_SECURECOPY(s, d, e)	if (d < e) LZ4_WILDCOPY(s, d, e)
342 #define	HTYPE U32
343 #define	INITBASE(base)		const BYTE* const base = ip
344 #else /* !LZ4_ARCH64 */
345 #define	STEPSIZE 4
346 #define	UARCH U32
347 #define	AARCH A32
348 #define	LZ4_COPYSTEP(s, d)	A32(d) = A32(s); d += 4; s += 4;
349 #define	LZ4_COPYPACKET(s, d)	LZ4_COPYSTEP(s, d); LZ4_COPYSTEP(s, d);
350 #define	LZ4_SECURECOPY		LZ4_WILDCOPY
351 #define	HTYPE const BYTE *
352 #define	INITBASE(base)		const int base = 0
353 #endif /* !LZ4_ARCH64 */
354 
355 #if (defined(LZ4_BIG_ENDIAN) && !defined(BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE))
356 #define	LZ4_READ_LITTLEENDIAN_16(d, s, p) \
357 	{ U16 v = A16(p); v = lz4_bswap16(v); d = (s) - v; }
358 #define	LZ4_WRITE_LITTLEENDIAN_16(p, i) \
359 	{ U16 v = (U16)(i); v = lz4_bswap16(v); A16(p) = v; p += 2; }
360 #else
361 #define	LZ4_READ_LITTLEENDIAN_16(d, s, p) { d = (s) - A16(p); }
362 #define	LZ4_WRITE_LITTLEENDIAN_16(p, v)  { A16(p) = v; p += 2; }
363 #endif
364 
365 
366 /* Local structures */
367 struct refTables {
368 	HTYPE hashTable[HASHTABLESIZE];
369 };
370 
371 
372 /* Macros */
373 #define	LZ4_HASH_FUNCTION(i) (((i) * 2654435761U) >> ((MINMATCH * 8) - \
374 	HASH_LOG))
375 #define	LZ4_HASH_VALUE(p) LZ4_HASH_FUNCTION(A32(p))
376 #define	LZ4_WILDCOPY(s, d, e) do { LZ4_COPYPACKET(s, d) } while (d < e);
377 #define	LZ4_BLINDCOPY(s, d, l) { BYTE* e = (d) + l; LZ4_WILDCOPY(s, d, e); \
378 	d = e; }
379 
380 
381 /* Private functions */
382 #if LZ4_ARCH64
383 
384 static inline int
LZ4_NbCommonBytes(register U64 val)385 LZ4_NbCommonBytes(register U64 val)
386 {
387 #if defined(LZ4_BIG_ENDIAN)
388 #if ((defined(__GNUC__) && (GCC_VERSION >= 304)) || defined(__clang__)) && \
389 	!defined(LZ4_FORCE_SW_BITCOUNT)
390 	return (__builtin_clzll(val) >> 3);
391 #else
392 	int r;
393 	if (!(val >> 32)) {
394 		r = 4;
395 	} else {
396 		r = 0;
397 		val >>= 32;
398 	}
399 	if (!(val >> 16)) {
400 		r += 2;
401 		val >>= 8;
402 	} else {
403 		val >>= 24;
404 	}
405 	r += (!val);
406 	return (r);
407 #endif
408 #else
409 #if ((defined(__GNUC__) && (GCC_VERSION >= 304)) || defined(__clang__)) && \
410 	!defined(LZ4_FORCE_SW_BITCOUNT)
411 	return (__builtin_ctzll(val) >> 3);
412 #else
413 	static const int DeBruijnBytePos[64] =
414 	    { 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 1, 5,
415 		3, 5, 1, 3, 4, 4, 2, 5, 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5,
416 		5, 3, 4, 5, 6, 7, 1, 2, 4, 6, 4,
417 		4, 5, 7, 2, 6, 5, 7, 6, 7, 7
418 	};
419 	return DeBruijnBytePos[((U64) ((val & -val) * 0x0218A392CDABBD3F)) >>
420 	    58];
421 #endif
422 #endif
423 }
424 
425 #else
426 
427 static inline int
LZ4_NbCommonBytes(register U32 val)428 LZ4_NbCommonBytes(register U32 val)
429 {
430 #if defined(LZ4_BIG_ENDIAN)
431 #if ((defined(__GNUC__) && (GCC_VERSION >= 304)) || defined(__clang__)) && \
432 	!defined(LZ4_FORCE_SW_BITCOUNT)
433 	return (__builtin_clz(val) >> 3);
434 #else
435 	int r;
436 	if (!(val >> 16)) {
437 		r = 2;
438 		val >>= 8;
439 	} else {
440 		r = 0;
441 		val >>= 24;
442 	}
443 	r += (!val);
444 	return (r);
445 #endif
446 #else
447 #if defined(__GNUC__) && (GCC_VERSION >= 304) && \
448 	!defined(LZ4_FORCE_SW_BITCOUNT)
449 	return (__builtin_ctz(val) >> 3);
450 #else
451 	static const int DeBruijnBytePos[32] = {
452 		0, 0, 3, 0, 3, 1, 3, 0,
453 		3, 2, 2, 1, 3, 2, 0, 1,
454 		3, 3, 1, 2, 2, 2, 2, 0,
455 		3, 1, 2, 0, 1, 0, 1, 1
456 	};
457 	return DeBruijnBytePos[((U32) ((val & -(S32) val) * 0x077CB531U)) >>
458 	    27];
459 #endif
460 #endif
461 }
462 
463 #endif
464 
465 /* Compression functions */
466 
467 static int
LZ4_compressCtx(void * ctx,const char * source,char * dest,int isize,int osize)468 LZ4_compressCtx(void *ctx, const char *source, char *dest, int isize,
469     int osize)
470 {
471 	struct refTables *srt = (struct refTables *)ctx;
472 	HTYPE *HashTable = (HTYPE *) (srt->hashTable);
473 
474 	const BYTE *ip = (BYTE *) source;
475 	INITBASE(base);
476 	const BYTE *anchor = ip;
477 	const BYTE *const iend = ip + isize;
478 	const BYTE *const oend = (BYTE *) dest + osize;
479 	const BYTE *const mflimit = iend - MFLIMIT;
480 #define	matchlimit (iend - LASTLITERALS)
481 
482 	BYTE *op = (BYTE *) dest;
483 
484 	int len, length;
485 	const int skipStrength = SKIPSTRENGTH;
486 	U32 forwardH;
487 
488 
489 	/* Init */
490 	if (isize < MINLENGTH)
491 		goto _last_literals;
492 
493 	/* First Byte */
494 	HashTable[LZ4_HASH_VALUE(ip)] = ip - base;
495 	ip++;
496 	forwardH = LZ4_HASH_VALUE(ip);
497 
498 	/* Main Loop */
499 	for (;;) {
500 		int findMatchAttempts = (1U << skipStrength) + 3;
501 		const BYTE *forwardIp = ip;
502 		const BYTE *ref;
503 		BYTE *token;
504 
505 		/* Find a match */
506 		do {
507 			U32 h = forwardH;
508 			int step = findMatchAttempts++ >> skipStrength;
509 			ip = forwardIp;
510 			forwardIp = ip + step;
511 
512 			if (unlikely(forwardIp > mflimit)) {
513 				goto _last_literals;
514 			}
515 
516 			forwardH = LZ4_HASH_VALUE(forwardIp);
517 			ref = base + HashTable[h];
518 			HashTable[h] = ip - base;
519 
520 		} while ((ref < ip - MAX_DISTANCE) || (A32(ref) != A32(ip)));
521 
522 		/* Catch up */
523 		while ((ip > anchor) && (ref > (BYTE *) source) &&
524 		    unlikely(ip[-1] == ref[-1])) {
525 			ip--;
526 			ref--;
527 		}
528 
529 		/* Encode Literal length */
530 		length = ip - anchor;
531 		token = op++;
532 
533 		/* Check output limit */
534 		if (unlikely(op + length + (2 + 1 + LASTLITERALS) +
535 		    (length >> 8) > oend))
536 			return (0);
537 
538 		if (length >= (int)RUN_MASK) {
539 			*token = (RUN_MASK << ML_BITS);
540 			len = length - RUN_MASK;
541 			for (; len > 254; len -= 255)
542 				*op++ = 255;
543 			*op++ = (BYTE)len;
544 		} else
545 			*token = (length << ML_BITS);
546 
547 		/* Copy Literals */
548 		LZ4_BLINDCOPY(anchor, op, length);
549 
550 		_next_match:
551 		/* Encode Offset */
552 		LZ4_WRITE_LITTLEENDIAN_16(op, ip - ref);
553 
554 		/* Start Counting */
555 		ip += MINMATCH;
556 		ref += MINMATCH;	/* MinMatch verified */
557 		anchor = ip;
558 		while (likely(ip < matchlimit - (STEPSIZE - 1))) {
559 			UARCH diff = AARCH(ref) ^ AARCH(ip);
560 			if (!diff) {
561 				ip += STEPSIZE;
562 				ref += STEPSIZE;
563 				continue;
564 			}
565 			ip += LZ4_NbCommonBytes(diff);
566 			goto _endCount;
567 		}
568 #if LZ4_ARCH64
569 		if ((ip < (matchlimit - 3)) && (A32(ref) == A32(ip))) {
570 			ip += 4;
571 			ref += 4;
572 		}
573 #endif
574 		if ((ip < (matchlimit - 1)) && (A16(ref) == A16(ip))) {
575 			ip += 2;
576 			ref += 2;
577 		}
578 		if ((ip < matchlimit) && (*ref == *ip))
579 			ip++;
580 		_endCount:
581 
582 		/* Encode MatchLength */
583 		len = (ip - anchor);
584 		/* Check output limit */
585 		if (unlikely(op + (1 + LASTLITERALS) + (len >> 8) > oend))
586 			return (0);
587 		if (len >= (int)ML_MASK) {
588 			*token += ML_MASK;
589 			len -= ML_MASK;
590 			for (; len > 509; len -= 510) {
591 				*op++ = 255;
592 				*op++ = 255;
593 			}
594 			if (len > 254) {
595 				len -= 255;
596 				*op++ = 255;
597 			}
598 			*op++ = (BYTE)len;
599 		} else
600 			*token += len;
601 
602 		/* Test end of chunk */
603 		if (ip > mflimit) {
604 			anchor = ip;
605 			break;
606 		}
607 		/* Fill table */
608 		HashTable[LZ4_HASH_VALUE(ip - 2)] = ip - 2 - base;
609 
610 		/* Test next position */
611 		ref = base + HashTable[LZ4_HASH_VALUE(ip)];
612 		HashTable[LZ4_HASH_VALUE(ip)] = ip - base;
613 		if ((ref > ip - (MAX_DISTANCE + 1)) && (A32(ref) == A32(ip))) {
614 			token = op++;
615 			*token = 0;
616 			goto _next_match;
617 		}
618 		/* Prepare next loop */
619 		anchor = ip++;
620 		forwardH = LZ4_HASH_VALUE(ip);
621 	}
622 
623 	_last_literals:
624 	/* Encode Last Literals */
625 	{
626 		int lastRun = iend - anchor;
627 		if (op + lastRun + 1 + ((lastRun + 255 - RUN_MASK) / 255) >
628 		    oend)
629 			return (0);
630 		if (lastRun >= (int)RUN_MASK) {
631 			*op++ = (RUN_MASK << ML_BITS);
632 			lastRun -= RUN_MASK;
633 			for (; lastRun > 254; lastRun -= 255) {
634 				*op++ = 255;
635 			}
636 			*op++ = (BYTE)lastRun;
637 		} else
638 			*op++ = (lastRun << ML_BITS);
639 		(void) memcpy(op, anchor, iend - anchor);
640 		op += iend - anchor;
641 	}
642 
643 	/* End */
644 	return (int)(((char *)op) - dest);
645 }
646 
647 
648 
649 /* Note : this function is valid only if isize < LZ4_64KLIMIT */
650 #define	LZ4_64KLIMIT ((1 << 16) + (MFLIMIT - 1))
651 #define	HASHLOG64K (HASH_LOG + 1)
652 #define	HASH64KTABLESIZE (1U << HASHLOG64K)
653 #define	LZ4_HASH64K_FUNCTION(i)	(((i) * 2654435761U) >> ((MINMATCH*8) - \
654 	HASHLOG64K))
655 #define	LZ4_HASH64K_VALUE(p)	LZ4_HASH64K_FUNCTION(A32(p))
656 
657 static int
LZ4_compress64kCtx(void * ctx,const char * source,char * dest,int isize,int osize)658 LZ4_compress64kCtx(void *ctx, const char *source, char *dest, int isize,
659     int osize)
660 {
661 	struct refTables *srt = (struct refTables *)ctx;
662 	U16 *HashTable = (U16 *) (srt->hashTable);
663 
664 	const BYTE *ip = (BYTE *) source;
665 	const BYTE *anchor = ip;
666 	const BYTE *const base = ip;
667 	const BYTE *const iend = ip + isize;
668 	const BYTE *const oend = (BYTE *) dest + osize;
669 	const BYTE *const mflimit = iend - MFLIMIT;
670 #define	matchlimit (iend - LASTLITERALS)
671 
672 	BYTE *op = (BYTE *) dest;
673 
674 	int len, length;
675 	const int skipStrength = SKIPSTRENGTH;
676 	U32 forwardH;
677 
678 	/* Init */
679 	if (isize < MINLENGTH)
680 		goto _last_literals;
681 
682 	/* First Byte */
683 	ip++;
684 	forwardH = LZ4_HASH64K_VALUE(ip);
685 
686 	/* Main Loop */
687 	for (;;) {
688 		int findMatchAttempts = (1U << skipStrength) + 3;
689 		const BYTE *forwardIp = ip;
690 		const BYTE *ref;
691 		BYTE *token;
692 
693 		/* Find a match */
694 		do {
695 			U32 h = forwardH;
696 			int step = findMatchAttempts++ >> skipStrength;
697 			ip = forwardIp;
698 			forwardIp = ip + step;
699 
700 			if (forwardIp > mflimit) {
701 				goto _last_literals;
702 			}
703 
704 			forwardH = LZ4_HASH64K_VALUE(forwardIp);
705 			ref = base + HashTable[h];
706 			HashTable[h] = ip - base;
707 
708 		} while (A32(ref) != A32(ip));
709 
710 		/* Catch up */
711 		while ((ip > anchor) && (ref > (BYTE *) source) &&
712 		    (ip[-1] == ref[-1])) {
713 			ip--;
714 			ref--;
715 		}
716 
717 		/* Encode Literal length */
718 		length = ip - anchor;
719 		token = op++;
720 
721 		/* Check output limit */
722 		if (unlikely(op + length + (2 + 1 + LASTLITERALS) +
723 		    (length >> 8) > oend))
724 			return (0);
725 
726 		if (length >= (int)RUN_MASK) {
727 			*token = (RUN_MASK << ML_BITS);
728 			len = length - RUN_MASK;
729 			for (; len > 254; len -= 255)
730 				*op++ = 255;
731 			*op++ = (BYTE)len;
732 		} else
733 			*token = (length << ML_BITS);
734 
735 		/* Copy Literals */
736 		LZ4_BLINDCOPY(anchor, op, length);
737 
738 		_next_match:
739 		/* Encode Offset */
740 		LZ4_WRITE_LITTLEENDIAN_16(op, ip - ref);
741 
742 		/* Start Counting */
743 		ip += MINMATCH;
744 		ref += MINMATCH;	/* MinMatch verified */
745 		anchor = ip;
746 		while (ip < matchlimit - (STEPSIZE - 1)) {
747 			UARCH diff = AARCH(ref) ^ AARCH(ip);
748 			if (!diff) {
749 				ip += STEPSIZE;
750 				ref += STEPSIZE;
751 				continue;
752 			}
753 			ip += LZ4_NbCommonBytes(diff);
754 			goto _endCount;
755 		}
756 #if LZ4_ARCH64
757 		if ((ip < (matchlimit - 3)) && (A32(ref) == A32(ip))) {
758 			ip += 4;
759 			ref += 4;
760 		}
761 #endif
762 		if ((ip < (matchlimit - 1)) && (A16(ref) == A16(ip))) {
763 			ip += 2;
764 			ref += 2;
765 		}
766 		if ((ip < matchlimit) && (*ref == *ip))
767 			ip++;
768 		_endCount:
769 
770 		/* Encode MatchLength */
771 		len = (ip - anchor);
772 		/* Check output limit */
773 		if (unlikely(op + (1 + LASTLITERALS) + (len >> 8) > oend))
774 			return (0);
775 		if (len >= (int)ML_MASK) {
776 			*token += ML_MASK;
777 			len -= ML_MASK;
778 			for (; len > 509; len -= 510) {
779 				*op++ = 255;
780 				*op++ = 255;
781 			}
782 			if (len > 254) {
783 				len -= 255;
784 				*op++ = 255;
785 			}
786 			*op++ = (BYTE)len;
787 		} else
788 			*token += len;
789 
790 		/* Test end of chunk */
791 		if (ip > mflimit) {
792 			anchor = ip;
793 			break;
794 		}
795 		/* Fill table */
796 		HashTable[LZ4_HASH64K_VALUE(ip - 2)] = ip - 2 - base;
797 
798 		/* Test next position */
799 		ref = base + HashTable[LZ4_HASH64K_VALUE(ip)];
800 		HashTable[LZ4_HASH64K_VALUE(ip)] = ip - base;
801 		if (A32(ref) == A32(ip)) {
802 			token = op++;
803 			*token = 0;
804 			goto _next_match;
805 		}
806 		/* Prepare next loop */
807 		anchor = ip++;
808 		forwardH = LZ4_HASH64K_VALUE(ip);
809 	}
810 
811 	_last_literals:
812 	/* Encode Last Literals */
813 	{
814 		int lastRun = iend - anchor;
815 		if (op + lastRun + 1 + ((lastRun + 255 - RUN_MASK) / 255) >
816 		    oend)
817 			return (0);
818 		if (lastRun >= (int)RUN_MASK) {
819 			*op++ = (RUN_MASK << ML_BITS);
820 			lastRun -= RUN_MASK;
821 			for (; lastRun > 254; lastRun -= 255)
822 				*op++ = 255;
823 			*op++ = (BYTE)lastRun;
824 		} else
825 			*op++ = (lastRun << ML_BITS);
826 		(void) memcpy(op, anchor, iend - anchor);
827 		op += iend - anchor;
828 	}
829 
830 	/* End */
831 	return (int)(((char *)op) - dest);
832 }
833 
834 static int
real_LZ4_compress(const char * source,char * dest,int isize,int osize)835 real_LZ4_compress(const char *source, char *dest, int isize, int osize)
836 {
837 	void *ctx;
838 	int result;
839 
840 	ctx = lz4_alloc(KM_SLEEP);
841 
842 	/*
843 	 * out of kernel memory, gently fall through - this will disable
844 	 * compression in zio_compress_data
845 	 */
846 	if (ctx == NULL)
847 		return (0);
848 
849 	memset(ctx, 0, sizeof (struct refTables));
850 
851 	if (isize < LZ4_64KLIMIT)
852 		result = LZ4_compress64kCtx(ctx, source, dest, isize, osize);
853 	else
854 		result = LZ4_compressCtx(ctx, source, dest, isize, osize);
855 
856 	lz4_free(ctx);
857 	return (result);
858 }
859 
860 /* Decompression functions */
861 
862 /*
863  * Note: The decoding functions real_LZ4_uncompress() and
864  *	LZ4_uncompress_unknownOutputSize() are safe against "buffer overflow"
865  *	attack type. They will never write nor read outside of the provided
866  *	output buffers. LZ4_uncompress_unknownOutputSize() also insures that
867  *	it will never read outside of the input buffer. A corrupted input
868  *	will produce an error result, a negative int, indicating the position
869  *	of the error within input stream.
870  *
871  * Note[2]: real_LZ4_uncompress(), referred to above, is not used in ZFS so
872  *	its code is not present here.
873  */
874 
875 static const int dec32table[] = {0, 3, 2, 3, 0, 0, 0, 0};
876 #if LZ4_ARCH64
877 static const int dec64table[] = {0, 0, 0, -1, 0, 1, 2, 3};
878 #endif
879 
880 static int
LZ4_uncompress_unknownOutputSize(const char * source,char * dest,int isize,int maxOutputSize)881 LZ4_uncompress_unknownOutputSize(const char *source, char *dest, int isize,
882     int maxOutputSize)
883 {
884 	/* Local Variables */
885 	const BYTE *restrict ip = (const BYTE *) source;
886 	const BYTE *const iend = ip + isize;
887 	const BYTE *ref;
888 
889 	BYTE *op = (BYTE *) dest;
890 	BYTE *const oend = op + maxOutputSize;
891 	BYTE *cpy;
892 
893 	/* Main Loop */
894 	while (ip < iend) {
895 		unsigned token;
896 		size_t length;
897 
898 		/* get runlength */
899 		token = *ip++;
900 		if ((length = (token >> ML_BITS)) == RUN_MASK) {
901 			int s = 255;
902 			while ((ip < iend) && (s == 255)) {
903 				s = *ip++;
904 				if (unlikely(length > (size_t)(length + s)))
905 					goto _output_error;
906 				length += s;
907 			}
908 		}
909 		/* copy literals */
910 		cpy = op + length;
911 		/* CORNER-CASE: cpy might overflow. */
912 		if (cpy < op)
913 			goto _output_error;	/* cpy was overflowed, bail! */
914 		if ((cpy > oend - COPYLENGTH) ||
915 		    (ip + length > iend - COPYLENGTH)) {
916 			if (cpy > oend)
917 				/* Error: writes beyond output buffer */
918 				goto _output_error;
919 			if (ip + length != iend)
920 				/*
921 				 * Error: LZ4 format requires to consume all
922 				 * input at this stage
923 				 */
924 				goto _output_error;
925 			(void) memcpy(op, ip, length);
926 			op += length;
927 			/* Necessarily EOF, due to parsing restrictions */
928 			break;
929 		}
930 		LZ4_WILDCOPY(ip, op, cpy);
931 		ip -= (op - cpy);
932 		op = cpy;
933 
934 		/* get offset */
935 		LZ4_READ_LITTLEENDIAN_16(ref, cpy, ip);
936 		ip += 2;
937 		if (ref < (BYTE * const) dest)
938 			/*
939 			 * Error: offset creates reference outside of
940 			 * destination buffer
941 			 */
942 			goto _output_error;
943 
944 		/* get matchlength */
945 		if ((length = (token & ML_MASK)) == ML_MASK) {
946 			while (ip < iend) {
947 				int s = *ip++;
948 				if (unlikely(length > (size_t)(length + s)))
949 					goto _output_error;
950 				length += s;
951 				if (s == 255)
952 					continue;
953 				break;
954 			}
955 		}
956 		/* copy repeated sequence */
957 		if (unlikely(op - ref < STEPSIZE)) {
958 #if LZ4_ARCH64
959 			int dec64 = dec64table[op - ref];
960 #else
961 			const int dec64 = 0;
962 #endif
963 			op[0] = ref[0];
964 			op[1] = ref[1];
965 			op[2] = ref[2];
966 			op[3] = ref[3];
967 			op += 4;
968 			ref += 4;
969 			ref -= dec32table[op - ref];
970 			A32(op) = A32(ref);
971 			op += STEPSIZE - 4;
972 			ref -= dec64;
973 		} else {
974 			LZ4_COPYSTEP(ref, op);
975 		}
976 		cpy = op + length - (STEPSIZE - 4);
977 		if (cpy > oend - COPYLENGTH) {
978 			if (cpy > oend)
979 				/*
980 				 * Error: request to write outside of
981 				 * destination buffer
982 				 */
983 				goto _output_error;
984 #if LZ4_ARCH64
985 			if ((ref + COPYLENGTH) > oend)
986 #else
987 			if ((ref + COPYLENGTH) > oend ||
988 			    (op + COPYLENGTH) > oend)
989 #endif
990 				goto _output_error;
991 			LZ4_SECURECOPY(ref, op, (oend - COPYLENGTH));
992 			while (op < cpy)
993 				*op++ = *ref++;
994 			op = cpy;
995 			if (op == oend)
996 				/*
997 				 * Check EOF (should never happen, since
998 				 * last 5 bytes are supposed to be literals)
999 				 */
1000 				goto _output_error;
1001 			continue;
1002 		}
1003 		LZ4_SECURECOPY(ref, op, cpy);
1004 		op = cpy;	/* correction */
1005 	}
1006 
1007 	/* end of decoding */
1008 	return (int)(((char *)op) - dest);
1009 
1010 	/* write overflow error detected */
1011 	_output_error:
1012 	return (-1);
1013 }
1014 
1015 #ifdef __FreeBSD__
1016 /*
1017  * FreeBSD has 4, 8 and 16 KB malloc zones which can be used here.
1018  * Should struct refTables get resized this may need to be revisited, hence
1019  * compiler-time asserts.
1020  */
1021 _Static_assert(sizeof(struct refTables) <= 16384,
1022     "refTables too big for malloc");
1023 _Static_assert((sizeof(struct refTables) % 4096) == 0,
1024     "refTables not a multiple of page size");
1025 #else
1026 #define ZFS_LZ4_USE_CACHE
1027 #endif
1028 
1029 #ifdef ZFS_LZ4_USE_CACHE
1030 static kmem_cache_t *lz4_cache;
1031 
1032 void
lz4_init(void)1033 lz4_init(void)
1034 {
1035 	lz4_cache = kmem_cache_create("lz4_cache",
1036 	    sizeof (struct refTables), 0, NULL, NULL, NULL, NULL, NULL, 0);
1037 }
1038 
1039 void
lz4_fini(void)1040 lz4_fini(void)
1041 {
1042 	if (lz4_cache) {
1043 		kmem_cache_destroy(lz4_cache);
1044 		lz4_cache = NULL;
1045 	}
1046 }
1047 
1048 static void *
lz4_alloc(int flags)1049 lz4_alloc(int flags)
1050 {
1051 	ASSERT(lz4_cache != NULL);
1052 	return (kmem_cache_alloc(lz4_cache, flags));
1053 }
1054 
1055 static void
lz4_free(void * ctx)1056 lz4_free(void *ctx)
1057 {
1058 	kmem_cache_free(lz4_cache, ctx);
1059 }
1060 #else
1061 void
lz4_init(void)1062 lz4_init(void)
1063 {
1064 }
1065 
1066 void
lz4_fini(void)1067 lz4_fini(void)
1068 {
1069 }
1070 
1071 static void *
lz4_alloc(int flags)1072 lz4_alloc(int flags)
1073 {
1074 	return (kmem_alloc(sizeof (struct refTables), flags));
1075 }
1076 
1077 static void
lz4_free(void * ctx)1078 lz4_free(void *ctx)
1079 {
1080 	kmem_free(ctx, sizeof (struct refTables));
1081 }
1082 #endif
1083