1 /*	$OpenBSD: rf_nwayxor.c,v 1.4 2002/12/16 07:01:04 tdeval Exp $	*/
2 /*	$NetBSD: rf_nwayxor.c,v 1.4 2000/03/30 12:45:41 augustss Exp $	*/
3 
4 /*
5  * Copyright (c) 1995 Carnegie-Mellon University.
6  * All rights reserved.
7  *
8  * Author: Mark Holland, Daniel Stodolsky
9  *
10  * Permission to use, copy, modify and distribute this software and
11  * its documentation is hereby granted, provided that both the copyright
12  * notice and this permission notice appear in all copies of the
13  * software, derivative works or modified versions, and any portions
14  * thereof, and that both notices appear in supporting documentation.
15  *
16  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
17  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
18  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
19  *
20  * Carnegie Mellon requests users of this software to return to
21  *
22  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
23  *  School of Computer Science
24  *  Carnegie Mellon University
25  *  Pittsburgh PA 15213-3890
26  *
27  * any improvements or extensions that they make and grant Carnegie the
28  * rights to redistribute these changes.
29  */
30 
31 /*************************************************************
32  *
33  * nwayxor.c -- Code to do N-way xors for reconstruction.
34  *
35  * nWayXorN xors N input buffers into the destination buffer.
36  * Adapted from danner's longword_bxor code.
37  *
38  *************************************************************/
39 
40 #include "rf_nwayxor.h"
41 #include "rf_shutdown.h"
42 
43 static int callcount[10];
44 void rf_ShutdownNWayXor(void *);
45 
46 void
rf_ShutdownNWayXor(void * ignored)47 rf_ShutdownNWayXor(void *ignored)
48 {
49 	int i;
50 
51 	if (rf_showXorCallCounts == 0)
52 		return;
53 	printf("Call counts for n-way xor routines:  ");
54 	for (i = 0; i < 10; i++)
55 		printf("%d ", callcount[i]);
56 	printf("\n");
57 }
58 
59 int
rf_ConfigureNWayXor(RF_ShutdownList_t ** listp)60 rf_ConfigureNWayXor(RF_ShutdownList_t **listp)
61 {
62 	int i, rc;
63 
64 	for (i = 0; i < 10; i++)
65 		callcount[i] = 0;
66 	rc = rf_ShutdownCreate(listp, rf_ShutdownNWayXor, NULL);
67 	return (rc);
68 }
69 
70 void
rf_nWayXor1(RF_ReconBuffer_t ** src_rbs,RF_ReconBuffer_t * dest_rb,int len)71 rf_nWayXor1(
72 	RF_ReconBuffer_t	**src_rbs,
73 	RF_ReconBuffer_t	 *dest_rb,
74 	int			  len
75 )
76 {
77 	unsigned long *src = (unsigned long *) src_rbs[0]->buffer;
78 	unsigned long *dest = (unsigned long *) dest_rb->buffer;
79 	unsigned long *end = src + len;
80 	unsigned long d0, d1, d2, d3, s0, s1, s2, s3;
81 
82 	callcount[1]++;
83 	while (len >= 4) {
84 		d0 = dest[0];
85 		d1 = dest[1];
86 		d2 = dest[2];
87 		d3 = dest[3];
88 		s0 = src[0];
89 		s1 = src[1];
90 		s2 = src[2];
91 		s3 = src[3];
92 		dest[0] = d0 ^ s0;
93 		dest[1] = d1 ^ s1;
94 		dest[2] = d2 ^ s2;
95 		dest[3] = d3 ^ s3;
96 		src += 4;
97 		dest += 4;
98 		len -= 4;
99 	}
100 	while (src < end) {
101 		*dest++ ^= *src++;
102 	}
103 }
104 
105 void
rf_nWayXor2(RF_ReconBuffer_t ** src_rbs,RF_ReconBuffer_t * dest_rb,int len)106 rf_nWayXor2(
107 	RF_ReconBuffer_t	**src_rbs,
108 	RF_ReconBuffer_t	 *dest_rb,
109 	int			  len
110 )
111 {
112 	unsigned long *dst = (unsigned long *) dest_rb->buffer;
113 	unsigned long *a = dst;
114 	unsigned long *b = (unsigned long *) src_rbs[0]->buffer;
115 	unsigned long *c = (unsigned long *) src_rbs[1]->buffer;
116 	unsigned long a0, a1, a2, a3, b0, b1, b2, b3;
117 
118 	callcount[2]++;
119 	/* Align dest to cache line. */
120 	while ((((unsigned long) dst) & 0x1f)) {
121 		*dst++ = *a++ ^ *b++ ^ *c++;
122 		len--;
123 	}
124 	while (len > 4) {
125 		a0 = a[0];
126 		len -= 4;
127 
128 		a1 = a[1];
129 		a2 = a[2];
130 
131 		a3 = a[3];
132 		a += 4;
133 
134 		b0 = b[0];
135 		b1 = b[1];
136 
137 		b2 = b[2];
138 		b3 = b[3];
139 		/* Start dual issue. */
140 		a0 ^= b0;
141 		b0 = c[0];
142 
143 		b += 4;
144 		a1 ^= b1;
145 
146 		a2 ^= b2;
147 		a3 ^= b3;
148 
149 		b1 = c[1];
150 		a0 ^= b0;
151 
152 		b2 = c[2];
153 		a1 ^= b1;
154 
155 		b3 = c[3];
156 		a2 ^= b2;
157 
158 		dst[0] = a0;
159 		a3 ^= b3;
160 		dst[1] = a1;
161 		c += 4;
162 		dst[2] = a2;
163 		dst[3] = a3;
164 		dst += 4;
165 	}
166 	while (len) {
167 		*dst++ = *a++ ^ *b++ ^ *c++;
168 		len--;
169 	}
170 }
171 
172 /* Note that first arg is not incremented but 2nd arg is. */
173 #define	LOAD_FIRST(_dst,_b)						\
174 	a0 = _dst[0]; len -= 4;						\
175 	a1 = _dst[1];							\
176 	a2 = _dst[2];							\
177 	a3 = _dst[3];							\
178 	b0 = _b[0];							\
179 	b1 = _b[1];							\
180 	b2 = _b[2];							\
181 	b3 = _b[3];  _b += 4;
182 
183 /* Note: arg is incremented. */
184 #define	XOR_AND_LOAD_NEXT(_n)						\
185 	a0 ^= b0; b0 = _n[0];						\
186 	a1 ^= b1; b1 = _n[1];						\
187 	a2 ^= b2; b2 = _n[2];						\
188 	a3 ^= b3; b3 = _n[3];						\
189 	_n += 4;
190 
191 /* Arg is incremented. */
192 #define	XOR_AND_STORE(_dst)						\
193 	a0 ^= b0; _dst[0] = a0;						\
194 	a1 ^= b1; _dst[1] = a1;						\
195 	a2 ^= b2; _dst[2] = a2;						\
196 	a3 ^= b3; _dst[3] = a3;						\
197 	_dst += 4;
198 
199 
200 void
rf_nWayXor3(RF_ReconBuffer_t ** src_rbs,RF_ReconBuffer_t * dest_rb,int len)201 rf_nWayXor3(
202 	RF_ReconBuffer_t	**src_rbs,
203 	RF_ReconBuffer_t	 *dest_rb,
204 	int			  len
205 )
206 {
207 	unsigned long *dst = (unsigned long *) dest_rb->buffer;
208 	unsigned long *b = (unsigned long *) src_rbs[0]->buffer;
209 	unsigned long *c = (unsigned long *) src_rbs[1]->buffer;
210 	unsigned long *d = (unsigned long *) src_rbs[2]->buffer;
211 	unsigned long a0, a1, a2, a3, b0, b1, b2, b3;
212 
213 	callcount[3]++;
214 	/* Align dest to cache line. */
215 	while ((((unsigned long) dst) & 0x1f)) {
216 		*dst++ ^= *b++ ^ *c++ ^ *d++;
217 		len--;
218 	}
219 	while (len > 4) {
220 		LOAD_FIRST(dst, b);
221 		XOR_AND_LOAD_NEXT(c);
222 		XOR_AND_LOAD_NEXT(d);
223 		XOR_AND_STORE(dst);
224 	}
225 	while (len) {
226 		*dst++ ^= *b++ ^ *c++ ^ *d++;
227 		len--;
228 	}
229 }
230 
231 void
rf_nWayXor4(RF_ReconBuffer_t ** src_rbs,RF_ReconBuffer_t * dest_rb,int len)232 rf_nWayXor4(
233 	RF_ReconBuffer_t	**src_rbs,
234 	RF_ReconBuffer_t	 *dest_rb,
235 	int			  len
236 )
237 {
238 	unsigned long *dst = (unsigned long *) dest_rb->buffer;
239 	unsigned long *b = (unsigned long *) src_rbs[0]->buffer;
240 	unsigned long *c = (unsigned long *) src_rbs[1]->buffer;
241 	unsigned long *d = (unsigned long *) src_rbs[2]->buffer;
242 	unsigned long *e = (unsigned long *) src_rbs[3]->buffer;
243 	unsigned long a0, a1, a2, a3, b0, b1, b2, b3;
244 
245 	callcount[4]++;
246 	/* Align dest to cache line. */
247 	while ((((unsigned long) dst) & 0x1f)) {
248 		*dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++;
249 		len--;
250 	}
251 	while (len > 4) {
252 		LOAD_FIRST(dst, b);
253 		XOR_AND_LOAD_NEXT(c);
254 		XOR_AND_LOAD_NEXT(d);
255 		XOR_AND_LOAD_NEXT(e);
256 		XOR_AND_STORE(dst);
257 	}
258 	while (len) {
259 		*dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++;
260 		len--;
261 	}
262 }
263 
264 void
rf_nWayXor5(RF_ReconBuffer_t ** src_rbs,RF_ReconBuffer_t * dest_rb,int len)265 rf_nWayXor5(
266 	RF_ReconBuffer_t	**src_rbs,
267 	RF_ReconBuffer_t	 *dest_rb,
268 	int			  len
269 )
270 {
271 	unsigned long *dst = (unsigned long *) dest_rb->buffer;
272 	unsigned long *b = (unsigned long *) src_rbs[0]->buffer;
273 	unsigned long *c = (unsigned long *) src_rbs[1]->buffer;
274 	unsigned long *d = (unsigned long *) src_rbs[2]->buffer;
275 	unsigned long *e = (unsigned long *) src_rbs[3]->buffer;
276 	unsigned long *f = (unsigned long *) src_rbs[4]->buffer;
277 	unsigned long a0, a1, a2, a3, b0, b1, b2, b3;
278 
279 	callcount[5]++;
280 	/* Align dest to cache line. */
281 	while ((((unsigned long) dst) & 0x1f)) {
282 		*dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++;
283 		len--;
284 	}
285 	while (len > 4) {
286 		LOAD_FIRST(dst, b);
287 		XOR_AND_LOAD_NEXT(c);
288 		XOR_AND_LOAD_NEXT(d);
289 		XOR_AND_LOAD_NEXT(e);
290 		XOR_AND_LOAD_NEXT(f);
291 		XOR_AND_STORE(dst);
292 	}
293 	while (len) {
294 		*dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++;
295 		len--;
296 	}
297 }
298 
299 void
rf_nWayXor6(RF_ReconBuffer_t ** src_rbs,RF_ReconBuffer_t * dest_rb,int len)300 rf_nWayXor6(
301 	RF_ReconBuffer_t	**src_rbs,
302 	RF_ReconBuffer_t	 *dest_rb,
303 	int			  len
304 )
305 {
306 	unsigned long *dst = (unsigned long *) dest_rb->buffer;
307 	unsigned long *b = (unsigned long *) src_rbs[0]->buffer;
308 	unsigned long *c = (unsigned long *) src_rbs[1]->buffer;
309 	unsigned long *d = (unsigned long *) src_rbs[2]->buffer;
310 	unsigned long *e = (unsigned long *) src_rbs[3]->buffer;
311 	unsigned long *f = (unsigned long *) src_rbs[4]->buffer;
312 	unsigned long *g = (unsigned long *) src_rbs[5]->buffer;
313 	unsigned long a0, a1, a2, a3, b0, b1, b2, b3;
314 
315 	callcount[6]++;
316 	/* Align dest to cache line. */
317 	while ((((unsigned long) dst) & 0x1f)) {
318 		*dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++ ^ *g++;
319 		len--;
320 	}
321 	while (len > 4) {
322 		LOAD_FIRST(dst, b);
323 		XOR_AND_LOAD_NEXT(c);
324 		XOR_AND_LOAD_NEXT(d);
325 		XOR_AND_LOAD_NEXT(e);
326 		XOR_AND_LOAD_NEXT(f);
327 		XOR_AND_LOAD_NEXT(g);
328 		XOR_AND_STORE(dst);
329 	}
330 	while (len) {
331 		*dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++ ^ *g++;
332 		len--;
333 	}
334 }
335 
336 void
rf_nWayXor7(RF_ReconBuffer_t ** src_rbs,RF_ReconBuffer_t * dest_rb,int len)337 rf_nWayXor7(
338 	RF_ReconBuffer_t	**src_rbs,
339 	RF_ReconBuffer_t	 *dest_rb,
340 	int			  len
341 )
342 {
343 	unsigned long *dst = (unsigned long *) dest_rb->buffer;
344 	unsigned long *b = (unsigned long *) src_rbs[0]->buffer;
345 	unsigned long *c = (unsigned long *) src_rbs[1]->buffer;
346 	unsigned long *d = (unsigned long *) src_rbs[2]->buffer;
347 	unsigned long *e = (unsigned long *) src_rbs[3]->buffer;
348 	unsigned long *f = (unsigned long *) src_rbs[4]->buffer;
349 	unsigned long *g = (unsigned long *) src_rbs[5]->buffer;
350 	unsigned long *h = (unsigned long *) src_rbs[6]->buffer;
351 	unsigned long a0, a1, a2, a3, b0, b1, b2, b3;
352 
353 	callcount[7]++;
354 	/* Align dest to cache line. */
355 	while ((((unsigned long) dst) & 0x1f)) {
356 		*dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++ ^ *g++ ^ *h++;
357 		len--;
358 	}
359 	while (len > 4) {
360 		LOAD_FIRST(dst, b);
361 		XOR_AND_LOAD_NEXT(c);
362 		XOR_AND_LOAD_NEXT(d);
363 		XOR_AND_LOAD_NEXT(e);
364 		XOR_AND_LOAD_NEXT(f);
365 		XOR_AND_LOAD_NEXT(g);
366 		XOR_AND_LOAD_NEXT(h);
367 		XOR_AND_STORE(dst);
368 	}
369 	while (len) {
370 		*dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++ ^ *g++ ^ *h++;
371 		len--;
372 	}
373 }
374 
375 void
rf_nWayXor8(RF_ReconBuffer_t ** src_rbs,RF_ReconBuffer_t * dest_rb,int len)376 rf_nWayXor8(
377 	RF_ReconBuffer_t	**src_rbs,
378 	RF_ReconBuffer_t	 *dest_rb,
379 	int			  len
380 )
381 {
382 	unsigned long *dst = (unsigned long *) dest_rb->buffer;
383 	unsigned long *b = (unsigned long *) src_rbs[0]->buffer;
384 	unsigned long *c = (unsigned long *) src_rbs[1]->buffer;
385 	unsigned long *d = (unsigned long *) src_rbs[2]->buffer;
386 	unsigned long *e = (unsigned long *) src_rbs[3]->buffer;
387 	unsigned long *f = (unsigned long *) src_rbs[4]->buffer;
388 	unsigned long *g = (unsigned long *) src_rbs[5]->buffer;
389 	unsigned long *h = (unsigned long *) src_rbs[6]->buffer;
390 	unsigned long *i = (unsigned long *) src_rbs[7]->buffer;
391 	unsigned long a0, a1, a2, a3, b0, b1, b2, b3;
392 
393 	callcount[8]++;
394 	/* Align dest to cache line. */
395 	while ((((unsigned long) dst) & 0x1f)) {
396 		*dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++ ^ *g++ ^ *h++ ^ *i++;
397 		len--;
398 	}
399 	while (len > 4) {
400 		LOAD_FIRST(dst, b);
401 		XOR_AND_LOAD_NEXT(c);
402 		XOR_AND_LOAD_NEXT(d);
403 		XOR_AND_LOAD_NEXT(e);
404 		XOR_AND_LOAD_NEXT(f);
405 		XOR_AND_LOAD_NEXT(g);
406 		XOR_AND_LOAD_NEXT(h);
407 		XOR_AND_LOAD_NEXT(i);
408 		XOR_AND_STORE(dst);
409 	}
410 	while (len) {
411 		*dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++ ^ *g++ ^ *h++ ^ *i++;
412 		len--;
413 	}
414 }
415 
416 
417 void
rf_nWayXor9(RF_ReconBuffer_t ** src_rbs,RF_ReconBuffer_t * dest_rb,int len)418 rf_nWayXor9(
419 	RF_ReconBuffer_t	**src_rbs,
420 	RF_ReconBuffer_t	 *dest_rb,
421 	int			  len
422 )
423 {
424 	unsigned long *dst = (unsigned long *) dest_rb->buffer;
425 	unsigned long *b = (unsigned long *) src_rbs[0]->buffer;
426 	unsigned long *c = (unsigned long *) src_rbs[1]->buffer;
427 	unsigned long *d = (unsigned long *) src_rbs[2]->buffer;
428 	unsigned long *e = (unsigned long *) src_rbs[3]->buffer;
429 	unsigned long *f = (unsigned long *) src_rbs[4]->buffer;
430 	unsigned long *g = (unsigned long *) src_rbs[5]->buffer;
431 	unsigned long *h = (unsigned long *) src_rbs[6]->buffer;
432 	unsigned long *i = (unsigned long *) src_rbs[7]->buffer;
433 	unsigned long *j = (unsigned long *) src_rbs[8]->buffer;
434 	unsigned long a0, a1, a2, a3, b0, b1, b2, b3;
435 
436 	callcount[9]++;
437 	/* Align dest to cache line. */
438 	while ((((unsigned long) dst) & 0x1f)) {
439 		*dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^
440 		    *f++ ^ *g++ ^ *h++ ^ *i++ ^ *j++;
441 		len--;
442 	}
443 	while (len > 4) {
444 		LOAD_FIRST(dst, b);
445 		XOR_AND_LOAD_NEXT(c);
446 		XOR_AND_LOAD_NEXT(d);
447 		XOR_AND_LOAD_NEXT(e);
448 		XOR_AND_LOAD_NEXT(f);
449 		XOR_AND_LOAD_NEXT(g);
450 		XOR_AND_LOAD_NEXT(h);
451 		XOR_AND_LOAD_NEXT(i);
452 		XOR_AND_LOAD_NEXT(j);
453 		XOR_AND_STORE(dst);
454 	}
455 	while (len) {
456 		*dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^
457 		    *f++ ^ *g++ ^ *h++ ^ *i++ ^ *j++;
458 		len--;
459 	}
460 }
461