1/*        $NetBSD: bzero.S,v 1.15 2016/12/19 14:30:23 christos Exp $ */
2
3/*-
4 * Copyright (C) 2001         Martin J. Laubach <mjl@NetBSD.org>
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 * 3. The name of the author may not be used to endorse or promote products
16 *    derived from this software without specific prior written permission.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29/*----------------------------------------------------------------------*/
30
31#include <machine/asm.h>
32
33
34#if defined(LIBC_SCCS) && !defined(lint)
35__RCSID("$NetBSD: bzero.S,v 1.15 2016/12/19 14:30:23 christos Exp $")
36#endif /* LIBC_SCCS && !lint */
37
38#include "assym.h"
39
40#define USE_STSWX 0 /* don't. slower than trivial copy loop */
41
42/*----------------------------------------------------------------------*/
43/*
44     void bzero(void *b %r3, size_t len %r4);
45     void * memset(void *b %r3, int c %r4, size_t len %r5);
46*/
47/*----------------------------------------------------------------------*/
48
49#define r_dst       %r3
50#define r_len       %r4
51#define r_val       %r0
52
53                    .text
54                    .align 4
55ENTRY(bzero)
56                    li        r_val, 0            /* Value to stuff in */
57                    cmplwi    %cr1, %r4, 0                  /* Zero length? */
58                    beqlr-    %cr1                          /* Yes, do nothing */
59                    b         cb_memset
60END(bzero)
61
62ENTRY(memset)
63                    cmplwi    %cr1, %r5, 0
64                    mr.       %r0, %r4
65                    mr        %r8, %r3
66                    beqlr-    %cr1                          /* Nothing to do */
67
68                    rlwimi    %r0, %r4, 8, 16, 23 /* word extend fill value */
69                    rlwimi    %r0, %r0, 16, 0, 15
70                    mr        %r4, %r5
71                    bne-      simple_fill                   /* =! 0, use trivial fill */
72cb_memset:
73
74/*----------------------------------------------------------------------*/
75#ifndef _KERNEL
76#ifdef __PIC__
77                    /* First get cache line size */
78                    mflr      %r9
79                    bcl       20,31,1f
801:                  mflr      %r10
81                    mtlr      %r9
82                    addis     %r10,%r10,_libc_powerpc_cache_info+CACHE_INFO_DCACHE_LINE_SIZE-1b@ha
83                    lwz       %r9,_libc_powerpc_cache_info+CACHE_INFO_DCACHE_LINE_SIZE-1b@l(%r10)
84#else
85                    lis       %r10,_libc_powerpc_cache_info+CACHE_INFO_DCACHE_LINE_SIZE@ha
86                    lwz       %r9,_libc_powerpc_cache_info+CACHE_INFO_DCACHE_LINE_SIZE@l(%r10)
87#endif
88                    cmplwi    %cr1, %r9, 0                  /* Unknown? */
89                    beq-      simple_fill                   /* a trivial fill routine */
90#else /* _KERNEL */
91#ifdef    MULTIPROCESSOR
92                    mfsprg    %r10, 0                       /* Get cpu_info pointer */
93#else
94                    lis       %r10, cpu_info_store@ha
95                    addi      %r10, %r10, cpu_info_store@l
96#endif
97                    lwz       %r9, CPU_CI+CACHE_INFO_DCACHE_LINE_SIZE(%r10)     /* Load D$ line size */
98#endif /* _KERNEL */
99                    cntlzw    %r10, %r9                     /* Calculate shift.. */
100                    li        %r6, 31
101                    subf      %r10, %r10, %r6
102                    /* Back in memory filling business */
103
104                    cmplwi    %cr1, r_len, 0                /* Nothing to do? */
105                    add       %r5, %r9, %r9
106                    cmplw     r_len, %r5                    /* <= 2*CL bytes to move? */
107                    beqlr-    %cr1                          /* then do nothing */
108
109                    blt+      simple_fill                   /* a trivial fill routine */
110
111                    /* Word align the block, fill bytewise until dst even*/
112
113                    andi.     %r5, r_dst, 0x03
114                    li        %r6, 4
115                    beq+      cb_aligned_w                  /* already aligned to word? */
116
117                    subf      %r5, %r5, %r6                 /* bytes to fill to align4 */
118#if USE_STSWX
119                    mtxer     %r5
120                    stswx     %r0, 0, r_dst
121                    add       r_dst, %r5, r_dst
122#else
123                    mtctr     %r5
124
125                    subi      r_dst, r_dst, 1
1261:                  stbu      r_val, 1(r_dst)               /* Fill bytewise */
127                    bdnz      1b
128
129                    addi      r_dst, r_dst, 1
130#endif
131                    subf      r_len, %r5, r_len
132
133cb_aligned_w:       /* Cache block align, fill wordwise until dst aligned */
134
135                    /* I know I have something to do since we had > 2*CL initially */
136                    /* so no need to check for r_len = 0 */
137
138                    subi      %r6, %r9, 1                   /* CL mask */
139                    and.      %r5, r_dst, %r6
140                    srwi      %r5, %r5, 2
141                    srwi      %r6, %r9, 2
142                    beq       cb_aligned_cb                 /* already on CL boundary? */
143
144                    subf      %r5, %r5, %r6                 /* words to fill to alignment */
145                    mtctr     %r5
146                    slwi      %r5, %r5, 2
147                    subf      r_len, %r5, r_len
148
149                    subi      r_dst, r_dst, 4
1501:                  stwu      r_val, 4(r_dst)               /* Fill wordwise */
151                    bdnz      1b
152                    addi      r_dst, r_dst, 4
153
154cb_aligned_cb:      /* no need to check r_len, see above */
155
156                    srw.      %r5, r_len, %r10              /* Number of cache blocks */
157                    mtctr     %r5
158                    beq       cblocks_done
159
160                    slw       %r5, %r5, %r10
161                    subf      r_len, %r5, r_len
162
1631:                  dcbz      0, r_dst            /* Clear blockwise */
164                    add       r_dst, r_dst, %r9
165                    bdnz      1b
166
167cblocks_done:       /* still CL aligned, but less than CL bytes left */
168                    cmplwi    %cr1, r_len, 0
169                    cmplwi    r_len, 8
170                    beq-      %cr1, sf_return
171
172                    blt-      sf_bytewise                   /* <8 remaining? */
173                    b         sf_aligned_w
174
175/*----------------------------------------------------------------------*/
176wbzero:             li        r_val, 0
177
178                    cmplwi    r_len, 0
179                    beqlr-                                  /* Nothing to do */
180
181simple_fill:
182#if USE_STSWX
183                    cmplwi    %cr1, r_len, 12               /* < 12 bytes to move? */
184#else
185                    cmplwi    %cr1, r_len, 8                /* < 8 bytes to move? */
186#endif
187                    andi.     %r5, r_dst, 0x03              /* bytes to fill to align4 */
188                    blt       %cr1, sf_bytewise   /* trivial byte mover */
189
190                    li        %r6, 4
191                    subf      %r5, %r5, %r6
192                    beq+      sf_aligned_w                  /* dest is word aligned */
193
194#if USE_STSWX
195                    mtxer     %r5
196                    stswx     %r0, 0, r_dst
197                    add       r_dst, %r5, r_dst
198#else
199                    mtctr     %r5                           /* nope, then fill bytewise */
200                    subi      r_dst, r_dst, 1               /* until it is */
2011:                  stbu      r_val, 1(r_dst)
202                    bdnz      1b
203
204                    addi      r_dst, r_dst, 1
205#endif
206                    subf      r_len, %r5, r_len
207
208sf_aligned_w:       /* no need to check r_len since it were >= 8 bytes initially */
209#if USE_STSWX
210                    mr        %r6, %r0
211                    mr        %r7, %r0
212
213                    srwi      %r5, r_len, 3
214                    mtctr     %r5
215
216                    slwi      %r5, %r5, 3                   /* adjust len */
217                    subf.     r_len, %r5, r_len
218
2191:                  stswi     %r6, r_dst, 8
220                    addi      r_dst, r_dst, 8
221                    bdnz      1b
222#else
223                    srwi      %r5, r_len, 2                 /* words to fill */
224                    mtctr     %r5
225
226                    slwi      %r5, %r5, 2
227                    subf.     r_len, %r5, r_len   /* adjust len for fill */
228
229                    subi      r_dst, r_dst, 4
2301:                  stwu      r_val, 4(r_dst)
231                    bdnz      1b
232                    addi      r_dst, r_dst, 4
233#endif
234
235sf_word_done:       bne-      sf_bytewise
236
237sf_return:          mr        %r3, %r8                      /* restore orig ptr */
238                    blr                                     /* for memset functionality */
239
240sf_bytewise:
241#if USE_STSWX
242                    mr        %r5, %r0
243                    mr        %r6, %r0
244                    mr        %r7, %r0
245
246                    mtxer     r_len
247                    stswx     %r5, 0, r_dst
248#else
249                    mtctr     r_len
250
251                    subi      r_dst, r_dst, 1
2521:                  stbu      r_val, 1(r_dst)
253                    bdnz      1b
254#endif
255                    mr        %r3, %r8                      /* restore orig ptr */
256                    blr                                     /* for memset functionality */
257END(memset)
258
259/*----------------------------------------------------------------------*/
260#ifndef _KERNEL
261                    .data
262                    .p2align 2
263cache_info:         .long     -1, -1, -1, -1
264cache_sh: .long     0
265
266#endif
267/*----------------------------------------------------------------------*/
268