xref: /NextBSD/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/lz4.c (revision 5e568154a01fb6be74908baed265f265a56f002f)
1 /*
2  * LZ4 - Fast LZ compression algorithm
3  * Header File
4  * Copyright (C) 2011-2013, Yann Collet.
5  * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions are
9  * met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  * notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above
14  * copyright notice, this list of conditions and the following disclaimer
15  * in the documentation and/or other materials provided with the
16  * distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29  *
30  * You can contact the author at :
31  * - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html
32  * - LZ4 source repository : http://code.google.com/p/lz4/
33  */
34 
35 #include <sys/zfs_context.h>
36 
37 static int real_LZ4_compress(const char *source, char *dest, int isize,
38     int osize);
39 static int LZ4_compressBound(int isize);
40 static int LZ4_uncompress_unknownOutputSize(const char *source, char *dest,
41     int isize, int maxOutputSize);
42 static int LZ4_compressCtx(void *ctx, const char *source, char *dest,
43     int isize, int osize);
44 static int LZ4_compress64kCtx(void *ctx, const char *source, char *dest,
45     int isize, int osize);
46 
47 static kmem_cache_t *lz4_ctx_cache;
48 
49 /*ARGSUSED*/
50 size_t
lz4_compress(void * s_start,void * d_start,size_t s_len,size_t d_len,int n)51 lz4_compress(void *s_start, void *d_start, size_t s_len, size_t d_len, int n)
52 {
53 	uint32_t bufsiz;
54 	char *dest = d_start;
55 
56 	ASSERT(d_len >= sizeof (bufsiz));
57 
58 	bufsiz = real_LZ4_compress(s_start, &dest[sizeof (bufsiz)], s_len,
59 	    d_len - sizeof (bufsiz));
60 
61 	/* Signal an error if the compression routine returned zero. */
62 	if (bufsiz == 0)
63 		return (s_len);
64 
65 	/*
66 	 * Encode the compresed buffer size at the start. We'll need this in
67 	 * decompression to counter the effects of padding which might be
68 	 * added to the compressed buffer and which, if unhandled, would
69 	 * confuse the hell out of our decompression function.
70 	 */
71 	*(uint32_t *)dest = BE_32(bufsiz);
72 
73 	return (bufsiz + sizeof (bufsiz));
74 }
75 
76 /*ARGSUSED*/
77 int
lz4_decompress(void * s_start,void * d_start,size_t s_len,size_t d_len,int n)78 lz4_decompress(void *s_start, void *d_start, size_t s_len, size_t d_len, int n)
79 {
80 	const char *src = s_start;
81 	uint32_t bufsiz = BE_IN32(src);
82 
83 	/* invalid compressed buffer size encoded at start */
84 	if (bufsiz + sizeof (bufsiz) > s_len)
85 		return (1);
86 
87 	/*
88 	 * Returns 0 on success (decompression function returned non-negative)
89 	 * and non-zero on failure (decompression function returned negative.
90 	 */
91 	return (LZ4_uncompress_unknownOutputSize(&src[sizeof (bufsiz)],
92 	    d_start, bufsiz, d_len) < 0);
93 }
94 
95 /*
96  * LZ4 API Description:
97  *
98  * Simple Functions:
99  * real_LZ4_compress() :
100  * 	isize  : is the input size. Max supported value is ~1.9GB
101  * 	return : the number of bytes written in buffer dest
102  *		 or 0 if the compression fails (if LZ4_COMPRESSMIN is set).
103  * 	note : destination buffer must be already allocated.
104  * 		destination buffer must be sized to handle worst cases
105  * 		situations (input data not compressible) worst case size
106  * 		evaluation is provided by function LZ4_compressBound().
107  *
108  * Advanced Functions
109  *
110  * LZ4_compressBound() :
111  * 	Provides the maximum size that LZ4 may output in a "worst case"
112  * 	scenario (input data not compressible) primarily useful for memory
113  * 	allocation of output buffer.
114  *
115  * 	isize  : is the input size. Max supported value is ~1.9GB
116  * 	return : maximum output size in a "worst case" scenario
117  * 	note : this function is limited by "int" range (2^31-1)
118  *
119  * LZ4_uncompress_unknownOutputSize() :
120  * 	isize  : is the input size, therefore the compressed size
121  * 	maxOutputSize : is the size of the destination buffer (which must be
122  * 		already allocated)
123  * 	return : the number of bytes decoded in the destination buffer
124  * 		(necessarily <= maxOutputSize). If the source stream is
125  * 		malformed, the function will stop decoding and return a
126  * 		negative result, indicating the byte position of the faulty
127  * 		instruction. This function never writes beyond dest +
128  * 		maxOutputSize, and is therefore protected against malicious
129  * 		data packets.
130  * 	note   : Destination buffer must be already allocated.
131  *
132  * LZ4_compressCtx() :
133  * 	This function explicitly handles the CTX memory structure.
134  *
135  * 	ILLUMOS CHANGES: the CTX memory structure must be explicitly allocated
136  * 	by the caller (either on the stack or using kmem_zalloc). Passing NULL
137  * 	isn't valid.
138  *
139  * LZ4_compress64kCtx() :
140  * 	Same as LZ4_compressCtx(), but specific to small inputs (<64KB).
141  * 	isize *Must* be <64KB, otherwise the output will be corrupted.
142  *
143  * 	ILLUMOS CHANGES: the CTX memory structure must be explicitly allocated
144  * 	by the caller (either on the stack or using kmem_zalloc). Passing NULL
145  * 	isn't valid.
146  */
147 
148 /*
149  * Tuning parameters
150  */
151 
152 /*
153  * COMPRESSIONLEVEL: Increasing this value improves compression ratio
154  *	 Lowering this value reduces memory usage. Reduced memory usage
155  *	typically improves speed, due to cache effect (ex: L1 32KB for Intel,
156  *	L1 64KB for AMD). Memory usage formula : N->2^(N+2) Bytes
157  *	(examples : 12 -> 16KB ; 17 -> 512KB)
158  */
159 #define	COMPRESSIONLEVEL 12
160 
161 /*
162  * NOTCOMPRESSIBLE_CONFIRMATION: Decreasing this value will make the
163  *	algorithm skip faster data segments considered "incompressible".
164  *	This may decrease compression ratio dramatically, but will be
165  *	faster on incompressible data. Increasing this value will make
166  *	the algorithm search more before declaring a segment "incompressible".
167  *	This could improve compression a bit, but will be slower on
168  *	incompressible data. The default value (6) is recommended.
169  */
170 #define	NOTCOMPRESSIBLE_CONFIRMATION 6
171 
172 /*
173  * BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE: This will provide a boost to
174  * performance for big endian cpu, but the resulting compressed stream
175  * will be incompatible with little-endian CPU. You can set this option
176  * to 1 in situations where data will stay within closed environment.
177  * This option is useless on Little_Endian CPU (such as x86).
178  */
179 /* #define	BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE 1 */
180 
181 /*
182  * CPU Feature Detection
183  */
184 
185 /* 32 or 64 bits ? */
186 #if (defined(__x86_64__) || defined(__x86_64) || defined(__amd64__) || \
187     defined(__amd64) || defined(__ppc64__) || defined(_WIN64) || \
188     defined(__LP64__) || defined(_LP64))
189 #define	LZ4_ARCH64 1
190 /*
191  * Illumos: On amd64 we have 20k of stack and 24k on sun4u and sun4v, so we
192  * can spend 16k on the algorithm
193  */
194 /* FreeBSD: Use heap for all platforms for now */
195 #define	STACKLIMIT 0
196 #else
197 #define	LZ4_ARCH64 0
198 /*
199  * Illumos: On i386 we only have 12k of stack, so in order to maintain the
200  * same COMPRESSIONLEVEL we have to use heap allocation. Performance will
201  * suck, but alas, it's ZFS on 32-bit we're talking about, so...
202  */
203 #define	STACKLIMIT 0
204 #endif
205 
206 /*
207  * Little Endian or Big Endian?
208  * Note: overwrite the below #define if you know your architecture endianess.
209  */
210 #if BYTE_ORDER == BIG_ENDIAN
211 #define	LZ4_BIG_ENDIAN 1
212 #else
213 /*
214  * Little Endian assumed. PDP Endian and other very rare endian format
215  * are unsupported.
216  */
217 #endif
218 
219 /*
220  * Unaligned memory access is automatically enabled for "common" CPU,
221  * such as x86. For others CPU, the compiler will be more cautious, and
222  * insert extra code to ensure aligned access is respected. If you know
223  * your target CPU supports unaligned memory access, you may want to
224  * force this option manually to improve performance
225  */
226 #if defined(__ARM_FEATURE_UNALIGNED)
227 #define	LZ4_FORCE_UNALIGNED_ACCESS 1
228 #endif
229 
230 /*
231  * FreeBSD: can't use GCC's __builtin_ctz when using sparc64 because
232  * gcc currently rely on libcompiler_rt.
233  *
234  * TODO: revisit this when situation changes.
235  */
236 #if defined(__sparc64__)
237 #define	LZ4_FORCE_SW_BITCOUNT
238 #endif
239 
240 /*
241  * Compiler Options
242  */
243 #if __STDC_VERSION__ >= 199901L	/* C99 */
244 /* "restrict" is a known keyword */
245 #else
246 /* Disable restrict */
247 #define	restrict
248 #endif
249 
250 #define	lz4_bswap16(x) ((unsigned short int) ((((x) >> 8) & 0xffu) | \
251 	(((x) & 0xffu) << 8)))
252 
253 #define	expect(expr, value)    (__builtin_expect((expr), (value)))
254 
255 #if defined(likely)
256 #undef likely
257 #endif
258 #if defined(unlikely)
259 #undef unlikely
260 #endif
261 
262 #define	likely(expr)	expect((expr) != 0, 1)
263 #define	unlikely(expr)	expect((expr) != 0, 0)
264 
265 /* Basic types */
266 #define	BYTE	uint8_t
267 #define	U16	uint16_t
268 #define	U32	uint32_t
269 #define	S32	int32_t
270 #define	U64	uint64_t
271 
272 #ifndef LZ4_FORCE_UNALIGNED_ACCESS
273 #pragma pack(1)
274 #endif
275 
276 typedef struct _U16_S {
277 	U16 v;
278 } U16_S;
279 typedef struct _U32_S {
280 	U32 v;
281 } U32_S;
282 typedef struct _U64_S {
283 	U64 v;
284 } U64_S;
285 
286 #ifndef LZ4_FORCE_UNALIGNED_ACCESS
287 #pragma pack()
288 #endif
289 
290 #define	A64(x) (((U64_S *)(x))->v)
291 #define	A32(x) (((U32_S *)(x))->v)
292 #define	A16(x) (((U16_S *)(x))->v)
293 
294 /*
295  * Constants
296  */
297 #define	MINMATCH 4
298 
299 #define	HASH_LOG COMPRESSIONLEVEL
300 #define	HASHTABLESIZE (1 << HASH_LOG)
301 #define	HASH_MASK (HASHTABLESIZE - 1)
302 
303 #define	SKIPSTRENGTH (NOTCOMPRESSIBLE_CONFIRMATION > 2 ? \
304 	NOTCOMPRESSIBLE_CONFIRMATION : 2)
305 
306 /*
307  * Defines if memory is allocated into the stack (local variable),
308  * or into the heap (kmem_alloc()).
309  */
310 #define	HEAPMODE (HASH_LOG > STACKLIMIT)
311 #define	COPYLENGTH 8
312 #define	LASTLITERALS 5
313 #define	MFLIMIT (COPYLENGTH + MINMATCH)
314 #define	MINLENGTH (MFLIMIT + 1)
315 
316 #define	MAXD_LOG 16
317 #define	MAX_DISTANCE ((1 << MAXD_LOG) - 1)
318 
319 #define	ML_BITS 4
320 #define	ML_MASK ((1U<<ML_BITS)-1)
321 #define	RUN_BITS (8-ML_BITS)
322 #define	RUN_MASK ((1U<<RUN_BITS)-1)
323 
324 
325 /*
326  * Architecture-specific macros
327  */
328 #if LZ4_ARCH64
329 #define	STEPSIZE 8
330 #define	UARCH U64
331 #define	AARCH A64
332 #define	LZ4_COPYSTEP(s, d)	A64(d) = A64(s); d += 8; s += 8;
333 #define	LZ4_COPYPACKET(s, d)	LZ4_COPYSTEP(s, d)
334 #define	LZ4_SECURECOPY(s, d, e)	if (d < e) LZ4_WILDCOPY(s, d, e)
335 #define	HTYPE U32
336 #define	INITBASE(base)		const BYTE* const base = ip
337 #else /* !LZ4_ARCH64 */
338 #define	STEPSIZE 4
339 #define	UARCH U32
340 #define	AARCH A32
341 #define	LZ4_COPYSTEP(s, d)	A32(d) = A32(s); d += 4; s += 4;
342 #define	LZ4_COPYPACKET(s, d)	LZ4_COPYSTEP(s, d); LZ4_COPYSTEP(s, d);
343 #define	LZ4_SECURECOPY		LZ4_WILDCOPY
344 #define	HTYPE const BYTE *
345 #define	INITBASE(base)		const int base = 0
346 #endif /* !LZ4_ARCH64 */
347 
348 #if (defined(LZ4_BIG_ENDIAN) && !defined(BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE))
349 #define	LZ4_READ_LITTLEENDIAN_16(d, s, p) \
350 	{ U16 v = A16(p); v = lz4_bswap16(v); d = (s) - v; }
351 #define	LZ4_WRITE_LITTLEENDIAN_16(p, i) \
352 	{ U16 v = (U16)(i); v = lz4_bswap16(v); A16(p) = v; p += 2; }
353 #else
354 #define	LZ4_READ_LITTLEENDIAN_16(d, s, p) { d = (s) - A16(p); }
355 #define	LZ4_WRITE_LITTLEENDIAN_16(p, v)  { A16(p) = v; p += 2; }
356 #endif
357 
358 
359 /* Local structures */
360 struct refTables {
361 	HTYPE hashTable[HASHTABLESIZE];
362 };
363 
364 
365 /* Macros */
366 #define	LZ4_HASH_FUNCTION(i) (((i) * 2654435761U) >> ((MINMATCH * 8) - \
367 	HASH_LOG))
368 #define	LZ4_HASH_VALUE(p) LZ4_HASH_FUNCTION(A32(p))
369 #define	LZ4_WILDCOPY(s, d, e) do { LZ4_COPYPACKET(s, d) } while (d < e);
370 #define	LZ4_BLINDCOPY(s, d, l) { BYTE* e = (d) + l; LZ4_WILDCOPY(s, d, e); \
371 	d = e; }
372 
373 
374 /* Private functions */
375 #if LZ4_ARCH64
376 
377 static inline int
LZ4_NbCommonBytes(register U64 val)378 LZ4_NbCommonBytes(register U64 val)
379 {
380 #if defined(LZ4_BIG_ENDIAN)
381 #if !defined(LZ4_FORCE_SW_BITCOUNT)
382 	return (__builtin_clzll(val) >> 3);
383 #else
384 	int r;
385 	if (!(val >> 32)) {
386 		r = 4;
387 	} else {
388 		r = 0;
389 		val >>= 32;
390 	}
391 	if (!(val >> 16)) {
392 		r += 2;
393 		val >>= 8;
394 	} else {
395 		val >>= 24;
396 	}
397 	r += (!val);
398 	return (r);
399 #endif
400 #else
401 #if !defined(LZ4_FORCE_SW_BITCOUNT)
402 	return (__builtin_ctzll(val) >> 3);
403 #else
404 	static const int DeBruijnBytePos[64] =
405 	    { 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 1, 5,
406 		3, 5, 1, 3, 4, 4, 2, 5, 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5,
407 		5, 3, 4, 5, 6, 7, 1, 2, 4, 6, 4,
408 		4, 5, 7, 2, 6, 5, 7, 6, 7, 7
409 	};
410 	return DeBruijnBytePos[((U64) ((val & -val) * 0x0218A392CDABBD3F)) >>
411 	    58];
412 #endif
413 #endif
414 }
415 
416 #else
417 
418 static inline int
LZ4_NbCommonBytes(register U32 val)419 LZ4_NbCommonBytes(register U32 val)
420 {
421 #if defined(LZ4_BIG_ENDIAN)
422 #if !defined(LZ4_FORCE_SW_BITCOUNT)
423 	return (__builtin_clz(val) >> 3);
424 #else
425 	int r;
426 	if (!(val >> 16)) {
427 		r = 2;
428 		val >>= 8;
429 	} else {
430 		r = 0;
431 		val >>= 24;
432 	}
433 	r += (!val);
434 	return (r);
435 #endif
436 #else
437 #if !defined(LZ4_FORCE_SW_BITCOUNT)
438 	return (__builtin_ctz(val) >> 3);
439 #else
440 	static const int DeBruijnBytePos[32] = {
441 		0, 0, 3, 0, 3, 1, 3, 0,
442 		3, 2, 2, 1, 3, 2, 0, 1,
443 		3, 3, 1, 2, 2, 2, 2, 0,
444 		3, 1, 2, 0, 1, 0, 1, 1
445 	};
446 	return DeBruijnBytePos[((U32) ((val & -(S32) val) * 0x077CB531U)) >>
447 	    27];
448 #endif
449 #endif
450 }
451 
452 #endif
453 
454 /* Public functions */
455 
456 static int
LZ4_compressBound(int isize)457 LZ4_compressBound(int isize)
458 {
459 	return (isize + (isize / 255) + 16);
460 }
461 
462 /* Compression functions */
463 
464 /*ARGSUSED*/
465 static int
LZ4_compressCtx(void * ctx,const char * source,char * dest,int isize,int osize)466 LZ4_compressCtx(void *ctx, const char *source, char *dest, int isize,
467     int osize)
468 {
469 #if HEAPMODE
470 	struct refTables *srt = (struct refTables *)ctx;
471 	HTYPE *HashTable = (HTYPE *) (srt->hashTable);
472 #else
473 	HTYPE HashTable[HASHTABLESIZE] = { 0 };
474 #endif
475 
476 	const BYTE *ip = (BYTE *) source;
477 	INITBASE(base);
478 	const BYTE *anchor = ip;
479 	const BYTE *const iend = ip + isize;
480 	const BYTE *const oend = (BYTE *) dest + osize;
481 	const BYTE *const mflimit = iend - MFLIMIT;
482 #define	matchlimit (iend - LASTLITERALS)
483 
484 	BYTE *op = (BYTE *) dest;
485 
486 	int len, length;
487 	const int skipStrength = SKIPSTRENGTH;
488 	U32 forwardH;
489 
490 
491 	/* Init */
492 	if (isize < MINLENGTH)
493 		goto _last_literals;
494 
495 	/* First Byte */
496 	HashTable[LZ4_HASH_VALUE(ip)] = ip - base;
497 	ip++;
498 	forwardH = LZ4_HASH_VALUE(ip);
499 
500 	/* Main Loop */
501 	for (;;) {
502 		int findMatchAttempts = (1U << skipStrength) + 3;
503 		const BYTE *forwardIp = ip;
504 		const BYTE *ref;
505 		BYTE *token;
506 
507 		/* Find a match */
508 		do {
509 			U32 h = forwardH;
510 			int step = findMatchAttempts++ >> skipStrength;
511 			ip = forwardIp;
512 			forwardIp = ip + step;
513 
514 			if unlikely(forwardIp > mflimit) {
515 				goto _last_literals;
516 			}
517 
518 			forwardH = LZ4_HASH_VALUE(forwardIp);
519 			ref = base + HashTable[h];
520 			HashTable[h] = ip - base;
521 
522 		} while ((ref < ip - MAX_DISTANCE) || (A32(ref) != A32(ip)));
523 
524 		/* Catch up */
525 		while ((ip > anchor) && (ref > (BYTE *) source) &&
526 		    unlikely(ip[-1] == ref[-1])) {
527 			ip--;
528 			ref--;
529 		}
530 
531 		/* Encode Literal length */
532 		length = ip - anchor;
533 		token = op++;
534 
535 		/* Check output limit */
536 		if unlikely(op + length + (2 + 1 + LASTLITERALS) +
537 		    (length >> 8) > oend)
538 			return (0);
539 
540 		if (length >= (int)RUN_MASK) {
541 			*token = (RUN_MASK << ML_BITS);
542 			len = length - RUN_MASK;
543 			for (; len > 254; len -= 255)
544 				*op++ = 255;
545 			*op++ = (BYTE)len;
546 		} else
547 			*token = (length << ML_BITS);
548 
549 		/* Copy Literals */
550 		LZ4_BLINDCOPY(anchor, op, length);
551 
552 		_next_match:
553 		/* Encode Offset */
554 		LZ4_WRITE_LITTLEENDIAN_16(op, ip - ref);
555 
556 		/* Start Counting */
557 		ip += MINMATCH;
558 		ref += MINMATCH;	/* MinMatch verified */
559 		anchor = ip;
560 		while likely(ip < matchlimit - (STEPSIZE - 1)) {
561 			UARCH diff = AARCH(ref) ^ AARCH(ip);
562 			if (!diff) {
563 				ip += STEPSIZE;
564 				ref += STEPSIZE;
565 				continue;
566 			}
567 			ip += LZ4_NbCommonBytes(diff);
568 			goto _endCount;
569 		}
570 #if LZ4_ARCH64
571 		if ((ip < (matchlimit - 3)) && (A32(ref) == A32(ip))) {
572 			ip += 4;
573 			ref += 4;
574 		}
575 #endif
576 		if ((ip < (matchlimit - 1)) && (A16(ref) == A16(ip))) {
577 			ip += 2;
578 			ref += 2;
579 		}
580 		if ((ip < matchlimit) && (*ref == *ip))
581 			ip++;
582 		_endCount:
583 
584 		/* Encode MatchLength */
585 		len = (ip - anchor);
586 		/* Check output limit */
587 		if unlikely(op + (1 + LASTLITERALS) + (len >> 8) > oend)
588 			return (0);
589 		if (len >= (int)ML_MASK) {
590 			*token += ML_MASK;
591 			len -= ML_MASK;
592 			for (; len > 509; len -= 510) {
593 				*op++ = 255;
594 				*op++ = 255;
595 			}
596 			if (len > 254) {
597 				len -= 255;
598 				*op++ = 255;
599 			}
600 			*op++ = (BYTE)len;
601 		} else
602 			*token += len;
603 
604 		/* Test end of chunk */
605 		if (ip > mflimit) {
606 			anchor = ip;
607 			break;
608 		}
609 		/* Fill table */
610 		HashTable[LZ4_HASH_VALUE(ip - 2)] = ip - 2 - base;
611 
612 		/* Test next position */
613 		ref = base + HashTable[LZ4_HASH_VALUE(ip)];
614 		HashTable[LZ4_HASH_VALUE(ip)] = ip - base;
615 		if ((ref > ip - (MAX_DISTANCE + 1)) && (A32(ref) == A32(ip))) {
616 			token = op++;
617 			*token = 0;
618 			goto _next_match;
619 		}
620 		/* Prepare next loop */
621 		anchor = ip++;
622 		forwardH = LZ4_HASH_VALUE(ip);
623 	}
624 
625 	_last_literals:
626 	/* Encode Last Literals */
627 	{
628 		int lastRun = iend - anchor;
629 		if (op + lastRun + 1 + ((lastRun + 255 - RUN_MASK) / 255) >
630 		    oend)
631 			return (0);
632 		if (lastRun >= (int)RUN_MASK) {
633 			*op++ = (RUN_MASK << ML_BITS);
634 			lastRun -= RUN_MASK;
635 			for (; lastRun > 254; lastRun -= 255) {
636 				*op++ = 255;
637 			}
638 			*op++ = (BYTE)lastRun;
639 		} else
640 			*op++ = (lastRun << ML_BITS);
641 		(void) memcpy(op, anchor, iend - anchor);
642 		op += iend - anchor;
643 	}
644 
645 	/* End */
646 	return (int)(((char *)op) - dest);
647 }
648 
649 
650 
651 /* Note : this function is valid only if isize < LZ4_64KLIMIT */
652 #define	LZ4_64KLIMIT ((1 << 16) + (MFLIMIT - 1))
653 #define	HASHLOG64K (HASH_LOG + 1)
654 #define	HASH64KTABLESIZE (1U << HASHLOG64K)
655 #define	LZ4_HASH64K_FUNCTION(i)	(((i) * 2654435761U) >> ((MINMATCH*8) - \
656 	HASHLOG64K))
657 #define	LZ4_HASH64K_VALUE(p)	LZ4_HASH64K_FUNCTION(A32(p))
658 
659 /*ARGSUSED*/
660 static int
LZ4_compress64kCtx(void * ctx,const char * source,char * dest,int isize,int osize)661 LZ4_compress64kCtx(void *ctx, const char *source, char *dest, int isize,
662     int osize)
663 {
664 #if HEAPMODE
665 	struct refTables *srt = (struct refTables *)ctx;
666 	U16 *HashTable = (U16 *) (srt->hashTable);
667 #else
668 	U16 HashTable[HASH64KTABLESIZE] = { 0 };
669 #endif
670 
671 	const BYTE *ip = (BYTE *) source;
672 	const BYTE *anchor = ip;
673 	const BYTE *const base = ip;
674 	const BYTE *const iend = ip + isize;
675 	const BYTE *const oend = (BYTE *) dest + osize;
676 	const BYTE *const mflimit = iend - MFLIMIT;
677 #define	matchlimit (iend - LASTLITERALS)
678 
679 	BYTE *op = (BYTE *) dest;
680 
681 	int len, length;
682 	const int skipStrength = SKIPSTRENGTH;
683 	U32 forwardH;
684 
685 	/* Init */
686 	if (isize < MINLENGTH)
687 		goto _last_literals;
688 
689 	/* First Byte */
690 	ip++;
691 	forwardH = LZ4_HASH64K_VALUE(ip);
692 
693 	/* Main Loop */
694 	for (;;) {
695 		int findMatchAttempts = (1U << skipStrength) + 3;
696 		const BYTE *forwardIp = ip;
697 		const BYTE *ref;
698 		BYTE *token;
699 
700 		/* Find a match */
701 		do {
702 			U32 h = forwardH;
703 			int step = findMatchAttempts++ >> skipStrength;
704 			ip = forwardIp;
705 			forwardIp = ip + step;
706 
707 			if (forwardIp > mflimit) {
708 				goto _last_literals;
709 			}
710 
711 			forwardH = LZ4_HASH64K_VALUE(forwardIp);
712 			ref = base + HashTable[h];
713 			HashTable[h] = ip - base;
714 
715 		} while (A32(ref) != A32(ip));
716 
717 		/* Catch up */
718 		while ((ip > anchor) && (ref > (BYTE *) source) &&
719 		    (ip[-1] == ref[-1])) {
720 			ip--;
721 			ref--;
722 		}
723 
724 		/* Encode Literal length */
725 		length = ip - anchor;
726 		token = op++;
727 
728 		/* Check output limit */
729 		if unlikely(op + length + (2 + 1 + LASTLITERALS) +
730 		    (length >> 8) > oend)
731 			return (0);
732 
733 		if (length >= (int)RUN_MASK) {
734 			*token = (RUN_MASK << ML_BITS);
735 			len = length - RUN_MASK;
736 			for (; len > 254; len -= 255)
737 				*op++ = 255;
738 			*op++ = (BYTE)len;
739 		} else
740 			*token = (length << ML_BITS);
741 
742 		/* Copy Literals */
743 		LZ4_BLINDCOPY(anchor, op, length);
744 
745 		_next_match:
746 		/* Encode Offset */
747 		LZ4_WRITE_LITTLEENDIAN_16(op, ip - ref);
748 
749 		/* Start Counting */
750 		ip += MINMATCH;
751 		ref += MINMATCH;	/* MinMatch verified */
752 		anchor = ip;
753 		while (ip < matchlimit - (STEPSIZE - 1)) {
754 			UARCH diff = AARCH(ref) ^ AARCH(ip);
755 			if (!diff) {
756 				ip += STEPSIZE;
757 				ref += STEPSIZE;
758 				continue;
759 			}
760 			ip += LZ4_NbCommonBytes(diff);
761 			goto _endCount;
762 		}
763 #if LZ4_ARCH64
764 		if ((ip < (matchlimit - 3)) && (A32(ref) == A32(ip))) {
765 			ip += 4;
766 			ref += 4;
767 		}
768 #endif
769 		if ((ip < (matchlimit - 1)) && (A16(ref) == A16(ip))) {
770 			ip += 2;
771 			ref += 2;
772 		}
773 		if ((ip < matchlimit) && (*ref == *ip))
774 			ip++;
775 		_endCount:
776 
777 		/* Encode MatchLength */
778 		len = (ip - anchor);
779 		/* Check output limit */
780 		if unlikely(op + (1 + LASTLITERALS) + (len >> 8) > oend)
781 			return (0);
782 		if (len >= (int)ML_MASK) {
783 			*token += ML_MASK;
784 			len -= ML_MASK;
785 			for (; len > 509; len -= 510) {
786 				*op++ = 255;
787 				*op++ = 255;
788 			}
789 			if (len > 254) {
790 				len -= 255;
791 				*op++ = 255;
792 			}
793 			*op++ = (BYTE)len;
794 		} else
795 			*token += len;
796 
797 		/* Test end of chunk */
798 		if (ip > mflimit) {
799 			anchor = ip;
800 			break;
801 		}
802 		/* Fill table */
803 		HashTable[LZ4_HASH64K_VALUE(ip - 2)] = ip - 2 - base;
804 
805 		/* Test next position */
806 		ref = base + HashTable[LZ4_HASH64K_VALUE(ip)];
807 		HashTable[LZ4_HASH64K_VALUE(ip)] = ip - base;
808 		if (A32(ref) == A32(ip)) {
809 			token = op++;
810 			*token = 0;
811 			goto _next_match;
812 		}
813 		/* Prepare next loop */
814 		anchor = ip++;
815 		forwardH = LZ4_HASH64K_VALUE(ip);
816 	}
817 
818 	_last_literals:
819 	/* Encode Last Literals */
820 	{
821 		int lastRun = iend - anchor;
822 		if (op + lastRun + 1 + ((lastRun + 255 - RUN_MASK) / 255) >
823 		    oend)
824 			return (0);
825 		if (lastRun >= (int)RUN_MASK) {
826 			*op++ = (RUN_MASK << ML_BITS);
827 			lastRun -= RUN_MASK;
828 			for (; lastRun > 254; lastRun -= 255)
829 				*op++ = 255;
830 			*op++ = (BYTE)lastRun;
831 		} else
832 			*op++ = (lastRun << ML_BITS);
833 		(void) memcpy(op, anchor, iend - anchor);
834 		op += iend - anchor;
835 	}
836 
837 	/* End */
838 	return (int)(((char *)op) - dest);
839 }
840 
841 static int
real_LZ4_compress(const char * source,char * dest,int isize,int osize)842 real_LZ4_compress(const char *source, char *dest, int isize, int osize)
843 {
844 #if HEAPMODE
845 	void *ctx = kmem_cache_alloc(lz4_ctx_cache, KM_NOSLEEP);
846 	int result;
847 
848 	/*
849 	 * out of kernel memory, gently fall through - this will disable
850 	 * compression in zio_compress_data
851 	 */
852 	if (ctx == NULL)
853 		return (0);
854 
855 	bzero(ctx, sizeof(struct refTables));
856 	if (isize < LZ4_64KLIMIT)
857 		result = LZ4_compress64kCtx(ctx, source, dest, isize, osize);
858 	else
859 		result = LZ4_compressCtx(ctx, source, dest, isize, osize);
860 
861 	kmem_cache_free(lz4_ctx_cache, ctx);
862 	return (result);
863 #else
864 	if (isize < (int)LZ4_64KLIMIT)
865 		return (LZ4_compress64kCtx(NULL, source, dest, isize, osize));
866 	return (LZ4_compressCtx(NULL, source, dest, isize, osize));
867 #endif
868 }
869 
870 /* Decompression functions */
871 
872 /*
873  * Note: The decoding functionLZ4_uncompress_unknownOutputSize() is safe
874  *	against "buffer overflow" attack type. They will never write nor
875  *	read outside of the provided output buffers.
876  *	LZ4_uncompress_unknownOutputSize() also insures that it will never
877  *	read outside of the input buffer.  A corrupted input will produce
878  *	an error result, a negative int, indicating the position of the
879  *	error within input stream.
880  */
881 
882 static int
LZ4_uncompress_unknownOutputSize(const char * source,char * dest,int isize,int maxOutputSize)883 LZ4_uncompress_unknownOutputSize(const char *source, char *dest, int isize,
884     int maxOutputSize)
885 {
886 	/* Local Variables */
887 	const BYTE *restrict ip = (const BYTE *) source;
888 	const BYTE *const iend = ip + isize;
889 	const BYTE *ref;
890 
891 	BYTE *op = (BYTE *) dest;
892 	BYTE *const oend = op + maxOutputSize;
893 	BYTE *cpy;
894 
895 	size_t dec32table[] = {0, 3, 2, 3, 0, 0, 0, 0};
896 #if LZ4_ARCH64
897 	size_t dec64table[] = {0, 0, 0, (size_t)-1, 0, 1, 2, 3};
898 #endif
899 
900 	/* Main Loop */
901 	while (ip < iend) {
902 		unsigned token;
903 		size_t length;
904 
905 		/* get runlength */
906 		token = *ip++;
907 		if ((length = (token >> ML_BITS)) == RUN_MASK) {
908 			int s = 255;
909 			while ((ip < iend) && (s == 255)) {
910 				s = *ip++;
911 				length += s;
912 			}
913 		}
914 		/* copy literals */
915 		cpy = op + length;
916 		if ((cpy > oend - COPYLENGTH) ||
917 		    (ip + length > iend - COPYLENGTH)) {
918 			if (cpy > oend)
919 				/* Error: writes beyond output buffer */
920 				goto _output_error;
921 			if (ip + length != iend)
922 				/*
923 				 * Error: LZ4 format requires to consume all
924 				 * input at this stage
925 				 */
926 				goto _output_error;
927 			(void) memcpy(op, ip, length);
928 			op += length;
929 			/* Necessarily EOF, due to parsing restrictions */
930 			break;
931 		}
932 		LZ4_WILDCOPY(ip, op, cpy);
933 		ip -= (op - cpy);
934 		op = cpy;
935 
936 		/* get offset */
937 		LZ4_READ_LITTLEENDIAN_16(ref, cpy, ip);
938 		ip += 2;
939 		if (ref < (BYTE * const) dest)
940 			/*
941 			 * Error: offset creates reference outside of
942 			 * destination buffer
943 			 */
944 			goto _output_error;
945 
946 		/* get matchlength */
947 		if ((length = (token & ML_MASK)) == ML_MASK) {
948 			while (ip < iend) {
949 				int s = *ip++;
950 				length += s;
951 				if (s == 255)
952 					continue;
953 				break;
954 			}
955 		}
956 		/* copy repeated sequence */
957 		if unlikely(op - ref < STEPSIZE) {
958 #if LZ4_ARCH64
959 			size_t dec64 = dec64table[op-ref];
960 #else
961 			const int dec64 = 0;
962 #endif
963 			op[0] = ref[0];
964 			op[1] = ref[1];
965 			op[2] = ref[2];
966 			op[3] = ref[3];
967 			op += 4;
968 			ref += 4;
969 			ref -= dec32table[op-ref];
970 			A32(op) = A32(ref);
971 			op += STEPSIZE - 4;
972 			ref -= dec64;
973 		} else {
974 			LZ4_COPYSTEP(ref, op);
975 		}
976 		cpy = op + length - (STEPSIZE - 4);
977 		if (cpy > oend - COPYLENGTH) {
978 			if (cpy > oend)
979 				/*
980 				 * Error: request to write outside of
981 				 * destination buffer
982 				 */
983 				goto _output_error;
984 			LZ4_SECURECOPY(ref, op, (oend - COPYLENGTH));
985 			while (op < cpy)
986 				*op++ = *ref++;
987 			op = cpy;
988 			if (op == oend)
989 				/*
990 				 * Check EOF (should never happen, since
991 				 * last 5 bytes are supposed to be literals)
992 				 */
993 				goto _output_error;
994 			continue;
995 		}
996 		LZ4_SECURECOPY(ref, op, cpy);
997 		op = cpy;	/* correction */
998 	}
999 
1000 	/* end of decoding */
1001 	return (int)(((char *)op) - dest);
1002 
1003 	/* write overflow error detected */
1004 	_output_error:
1005 	return (int)(-(((char *)ip) - source));
1006 }
1007 
1008 extern void
lz4_init(void)1009 lz4_init(void)
1010 {
1011 
1012 #if HEAPMODE
1013 	lz4_ctx_cache = kmem_cache_create("lz4_ctx", sizeof(struct refTables),
1014 	    0, NULL, NULL, NULL, NULL, NULL, 0);
1015 #endif
1016 }
1017 
1018 extern void
lz4_fini(void)1019 lz4_fini(void)
1020 {
1021 
1022 #if HEAPMODE
1023 	kmem_cache_destroy(lz4_ctx_cache);
1024 #endif
1025 }
1026