1/*
2 * Written by J.T. Conklin <jtc@acorntoolworks.com>
3 * Public domain.
4 */
5
6#include <machine/asm.h>
7
8#if defined(LIBC_SCCS)
9          RCSID("$NetBSD: strcpy.S,v 1.2 2014/03/22 19:16:34 jakllsch Exp $")
10#endif
11
12/*
13 * This strcpy implementation copies a byte at a time until the
14 * source pointer is aligned to a word boundary, it then copies by
15 * words until it finds a word containing a zero byte, and finally
16 * copies by bytes until the end of the string is reached.
17 *
18 * While this may result in unaligned stores if the source and
19 * destination pointers are unaligned with respect to each other,
20 * it is still faster than either byte copies or the overhead of
21 * an implementation suitable for machines with strict alignment
22 * requirements.
23 */
24
25ENTRY(strcpy)
26          movq      %rdi,%rax
27          movabsq   $0x0101010101010101,%r8
28          movabsq   $0x8080808080808080,%r9
29
30          /*
31           * Align source to a word boundary.
32           * Consider unrolling loop?
33           */
34          _ALIGN_TEXT
35.Lalign:
36          testb     $7,%sil
37          je        .Lword_aligned
38          movb      (%rsi),%dl
39          incq      %rsi
40          movb      %dl,(%rdi)
41          incq      %rdi
42          testb     %dl,%dl
43          jne       .Lalign
44          ret
45
46          _ALIGN_TEXT
47.Lloop:
48          movq      %rdx,(%rdi)
49          addq      $8,%rdi
50.Lword_aligned:
51          movq      (%rsi),%rdx
52          movq      %rdx,%rcx
53          addq      $8,%rsi
54          subq      %r8,%rcx
55          testq     %r9,%rcx
56          je        .Lloop
57
58          /*
59           * In rare cases, the above loop may exit prematurely. We must
60           * return to the loop if none of the bytes in the word equal 0.
61           */
62
63          movb      %dl,(%rdi)
64          incq      %rdi
65          testb     %dl,%dl             /* 1st byte == 0? */
66          je        .Ldone
67
68          shrq      $8,%rdx
69          movb      %dl,(%rdi)
70          incq      %rdi
71          testb     %dl,%dl             /* 2nd byte == 0? */
72          je        .Ldone
73
74          shrq      $8,%rdx
75          movb      %dl,(%rdi)
76          incq      %rdi
77          testb     %dl,%dl             /* 3rd byte == 0? */
78          je        .Ldone
79
80          shrq      $8,%rdx
81          movb      %dl,(%rdi)
82          incq      %rdi
83          testb     %dl,%dl             /* 4th byte == 0? */
84          je        .Ldone
85
86          shrq      $8,%rdx
87          movb      %dl,(%rdi)
88          incq      %rdi
89          testb     %dl,%dl             /* 5th byte == 0? */
90          je        .Ldone
91
92          shrq      $8,%rdx
93          movb      %dl,(%rdi)
94          incq      %rdi
95          testb     %dl,%dl             /* 6th byte == 0? */
96          je        .Ldone
97
98          shrq      $8,%rdx
99          movb      %dl,(%rdi)
100          incq      %rdi
101          testb     %dl,%dl             /* 7th byte == 0? */
102          je        .Ldone
103
104          shrq      $8,%rdx
105          movb      %dl,(%rdi)
106          incq      %rdi
107          testb     %dl,%dl             /* 8th byte == 0? */
108          jne       .Lword_aligned
109
110.Ldone:
111          ret
112END(strcpy)
113