1/*        $NetBSD: copy.S,v 1.36 2024/09/24 20:19:30 andvar Exp $     */
2
3/*
4 * Copyright (c) 2001 Wasabi Systems, Inc.
5 * All rights reserved.
6 *
7 * Written by Frank van der Linden for Wasabi Systems, Inc.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 *    notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 *    notice, this list of conditions and the following disclaimer in the
16 *    documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 *    must display the following acknowledgement:
19 *      This product includes software developed for the NetBSD Project by
20 *      Wasabi Systems, Inc.
21 * 4. The name of Wasabi Systems, Inc. may not be used to endorse
22 *    or promote products derived from this software without specific prior
23 *    written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL WASABI SYSTEMS, INC
29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
36 */
37
38#include "assym.h"
39
40#include <sys/errno.h>
41#include <sys/syscall.h>
42
43#include <machine/asm.h>
44#include <machine/frameasm.h>
45
46#define GET_CURPCB(reg)       \
47          movq      CPUVAR(CURLWP),reg; \
48          movq      L_PCB(reg),reg
49
50/*
51 * These are arranged so that the abnormal case is a forwards
52 * conditional branch - which will be predicted not-taken by
53 * both Intel and AMD processors.
54 */
55#define DEFERRED_SWITCH_CHECK \
56          CHECK_DEFERRED_SWITCH                             ; \
57          jnz       99f                                     ; \
5898:
59
60#define DEFERRED_SWITCH_CALL \
6199:                                                         ; \
62          call      _C_LABEL(do_pmap_load)                  ; \
63          jmp       98b
64
65/*
66 * The following primitives are to copy regions of memory.
67 * Label must be before all copy functions.
68 */
69          .text
70
71x86_copyfunc_start: .globl    x86_copyfunc_start
72
73/*
74 * Handle deferred pmap switch.  We must re-enable preemption without
75 * making a function call, so that the program counter is visible to
76 * cpu_kpreempt_exit().  It can then know if it needs to restore the
77 * pmap on returning, because a preemption occurred within one of the
78 * copy functions.
79 */
80ENTRY(do_pmap_load)
81          pushq     %rbp
82          movq      %rsp,%rbp
83          pushq     %rdi
84          pushq     %rsi
85          pushq     %rdx
86          pushq     %rcx
87          pushq     %rbx
88          movq      CPUVAR(CURLWP),%rbx
891:
90          incl      L_NOPREEMPT(%rbx)
91          call      _C_LABEL(pmap_load)
92          decl      L_NOPREEMPT(%rbx)
93          jnz       2f
94          cmpl      $0,L_DOPREEMPT(%rbx)
95          jz        2f
96          xorq      %rdi,%rdi
97          call      _C_LABEL(kpreempt)
982:
99          cmpl      $0,CPUVAR(WANT_PMAPLOAD)
100          jnz       1b
101          popq      %rbx
102          popq      %rcx
103          popq      %rdx
104          popq      %rsi
105          popq      %rdi
106          leaveq
107          ret
108END(do_pmap_load)
109
110/*
111 * Copy routines from and to userland, plus a few more. See the
112 * section 9 manpages for info. Some cases can be optimized more.
113 *
114 * I wonder if it's worthwhile to make these use SSE2 registers?
115 * (dsl) Not from info I've read from the AMD guides.
116 *
117 * Also note that the setup time for 'rep movs' is horrid - especially on P4
118 * netburst - but on my AMD X2 it manages one copy (read+write) per clock
119 * which can be achieved with a code loop, but is probably impossible to beat.
120 * However the use of 'rep movsb' for the final bytes should be killed.
121 *
122 * Newer Intel cpus have a much lower setup time, and may (someday)
123 * be able to do cache-line size copies....
124 */
125
126/*
127 * int kcopy(const void *from, void *to, size_t len);
128 * Copy len bytes from and to kernel memory, and abort on fault.
129 */
130ENTRY(kcopy)
131          xchgq     %rdi,%rsi
132          movq      %rdx,%rcx
133.Lkcopy_start:
134          movq      %rdi,%rax
135          subq      %rsi,%rax
136          cmpq      %rcx,%rax           /* overlapping? */
137          jb        1f
138          /* nope, copy forward */
139          shrq      $3,%rcx                       /* copy by 64-bit words */
140          rep
141          movsq
142
143          movq      %rdx,%rcx
144          andl      $7,%ecx                       /* any bytes left? */
145          rep
146          movsb
147
148          xorq      %rax,%rax
149          ret
150
151/*
152 * Using 'rep movs' to copy backwards is not as fast as for forwards copies
153 * and ought not be done when the copy doesn't actually overlap.
154 * However kcopy() isn't used any that looks even vaguely used often.
155 * I'm also not sure it is ever asked to do overlapping copies!
156 */
157
1581:        addq      %rcx,%rdi           /* copy backward */
159          addq      %rcx,%rsi
160          std
161          andq      $7,%rcx                       /* any fractional bytes? */
162          decq      %rdi
163          decq      %rsi
164          rep
165          movsb
166          movq      %rdx,%rcx           /* copy remainder by 64-bit words */
167          shrq      $3,%rcx
168          subq      $7,%rsi
169          subq      $7,%rdi
170          rep
171          movsq
172          cld
173.Lkcopy_end:
174          xorq      %rax,%rax
175          ret
176END(kcopy)
177
178ENTRY(copyout)
179          DEFERRED_SWITCH_CHECK
180
181          xchgq     %rdi,%rsi           /* kernel address to %rsi, user to %rdi */
182          movq      %rdx,%rax           /* save transfer length (bytes) */
183
184          addq      %rdi,%rdx           /* end address to %rdx */
185          jc        _C_LABEL(copy_efault)         /* jump if wraps */
186          movq      $VM_MAXUSER_ADDRESS,%r8
187          cmpq      %r8,%rdx
188          ja        _C_LABEL(copy_efault)         /* jump if end in kernel space */
189
190          SMAP_DISABLE
191.Lcopyout_start:
192          movq      %rax,%rcx           /* length */
193          shrq      $3,%rcx                       /* count of 8-byte words */
194          rep
195          movsq                                   /* copy from %rsi to %rdi */
196          movb      %al,%cl
197          andb      $7,%cl                        /* remaining number of bytes */
198          rep
199          movsb                                   /* copy remaining bytes */
200.Lcopyout_end:
201          SMAP_ENABLE
202
203          xorl      %eax,%eax
204          ret
205          DEFERRED_SWITCH_CALL
206END(copyout)
207
208ENTRY(copyin)
209          DEFERRED_SWITCH_CHECK
210
211          xchgq     %rdi,%rsi
212          movq      %rdx,%rax
213
214          addq      %rsi,%rdx           /* check source address not wrapped */
215          jc        _C_LABEL(copy_efault)
216          movq      $VM_MAXUSER_ADDRESS,%r8
217          cmpq      %r8,%rdx
218          ja        _C_LABEL(copy_efault)         /* j if end in kernel space */
219
220          SMAP_DISABLE
221.Lcopyin_start:
2223:        /* bcopy(%rsi, %rdi, %rax); */
223          movq      %rax,%rcx
224          shrq      $3,%rcx
225          rep
226          movsq
227          movb      %al,%cl
228          andb      $7,%cl
229          rep
230          movsb
231.Lcopyin_end:
232          SMAP_ENABLE
233
234          xorl      %eax,%eax
235          ret
236          DEFERRED_SWITCH_CALL
237END(copyin)
238
239ENTRY(copy_efault)
240          movq      $EFAULT,%rax
241          ret
242END(copy_efault)
243
244ENTRY(kcopy_fault)
245          cld
246          ret
247END(kcopy_fault)
248
249ENTRY(copy_fault)
250          SMAP_ENABLE
251          ret
252END(copy_fault)
253
254ENTRY(copyoutstr)
255          DEFERRED_SWITCH_CHECK
256          xchgq     %rdi,%rsi
257          movq      %rdx,%r8
258          movq      %rcx,%r9
259
260          /*
261           * Get min(%rdx, VM_MAXUSER_ADDRESS-%rdi).
262           */
263          movq      $VM_MAXUSER_ADDRESS,%rax
264          subq      %rdi,%rax
265          jc        _C_LABEL(copystr_efault)
266          cmpq      %rdx,%rax
267          jae       1f
268          movq      %rax,%rdx
269          movq      %rax,%r8
2701:        incq      %rdx
271
272          SMAP_DISABLE
273.Lcopyoutstr_start:
2741:        decq      %rdx
275          jz        2f
276          lodsb
277          stosb
278          testb     %al,%al
279          jnz       1b
280.Lcopyoutstr_end:
281          SMAP_ENABLE
282
283          /* Success -- 0 byte reached. */
284          decq      %rdx
285          xorq      %rax,%rax
286          jmp       copystr_return
287
2882:        /* rdx is zero -- return EFAULT or ENAMETOOLONG. */
289          SMAP_ENABLE
290          movq      $VM_MAXUSER_ADDRESS,%r11
291          cmpq      %r11,%rdi
292          jae       _C_LABEL(copystr_efault)
293          movq      $ENAMETOOLONG,%rax
294          jmp       copystr_return
295          DEFERRED_SWITCH_CALL
296END(copyoutstr)
297
298ENTRY(copyinstr)
299          DEFERRED_SWITCH_CHECK
300          xchgq     %rdi,%rsi
301          movq      %rdx,%r8
302          movq      %rcx,%r9
303
304          /*
305           * Get min(%rdx, VM_MAXUSER_ADDRESS-%rsi).
306           */
307          movq      $VM_MAXUSER_ADDRESS,%rax
308          subq      %rsi,%rax
309          jc        _C_LABEL(copystr_efault)
310          cmpq      %rdx,%rax
311          jae       1f
312          movq      %rax,%rdx
313          movq      %rax,%r8
3141:        incq      %rdx
315
316          SMAP_DISABLE
317.Lcopyinstr_start:
3181:        decq      %rdx
319          jz        2f
320          lodsb
321          stosb
322          testb     %al,%al
323          jnz       1b
324.Lcopyinstr_end:
325          SMAP_ENABLE
326
327          /* Success -- 0 byte reached. */
328          decq      %rdx
329          xorq      %rax,%rax
330          jmp       copystr_return
331
3322:        /* rdx is zero -- return EFAULT or ENAMETOOLONG. */
333          SMAP_ENABLE
334          movq      $VM_MAXUSER_ADDRESS,%r11
335          cmpq      %r11,%rsi
336          jae       _C_LABEL(copystr_efault)
337          movq      $ENAMETOOLONG,%rax
338          jmp       copystr_return
339          DEFERRED_SWITCH_CALL
340END(copyinstr)
341
342ENTRY(copystr_efault)
343          movl      $EFAULT,%eax
344          jmp       copystr_return
345END(copystr_efault)
346
347ENTRY(copystr_fault)
348          SMAP_ENABLE
349copystr_return:
350          /* Set *lencopied and return %eax. */
351          testq     %r9,%r9
352          jz        8f
353          subq      %rdx,%r8
354          movq      %r8,(%r9)
3558:        ret
356END(copystr_fault)
357
358/**************************************************************************/
359
360#define   UFETCHSTORE_PROLOGUE(x)                                                         \
361          movq      $VM_MAXUSER_ADDRESS-x,%r11                        ;         \
362          cmpq      %r11,%rdi                                         ;         \
363          ja        _C_LABEL(copy_efault)
364
365/* LINTSTUB: int _ufetch_8(const uint8_t *uaddr, uint8_t *valp); */
366ENTRY(_ufetch_8)
367          DEFERRED_SWITCH_CHECK
368          UFETCHSTORE_PROLOGUE(1)
369
370          SMAP_DISABLE
371.L_ufetch_8_start:
372          movb      (%rdi),%al
373.L_ufetch_8_end:
374          SMAP_ENABLE
375
376          movb      %al,(%rsi)
377          xorq      %rax,%rax
378          ret
379          DEFERRED_SWITCH_CALL
380END(_ufetch_8)
381
382/* LINTSTUB: int _ufetch_16(const uint16_t *uaddr, uint16_t *valp); */
383ENTRY(_ufetch_16)
384          DEFERRED_SWITCH_CHECK
385          UFETCHSTORE_PROLOGUE(2)
386
387          SMAP_DISABLE
388.L_ufetch_16_start:
389          movw      (%rdi),%ax
390.L_ufetch_16_end:
391          SMAP_ENABLE
392
393          movw      %ax,(%rsi)
394          xorq      %rax,%rax
395          ret
396          DEFERRED_SWITCH_CALL
397END(_ufetch_16)
398
399/* LINTSTUB: int _ufetch_32(const uint32_t *uaddr, uint32_t *valp); */
400ENTRY(_ufetch_32)
401          DEFERRED_SWITCH_CHECK
402          UFETCHSTORE_PROLOGUE(4)
403
404          SMAP_DISABLE
405.L_ufetch_32_start:
406          movl      (%rdi),%eax
407.L_ufetch_32_end:
408          SMAP_ENABLE
409
410          movl      %eax,(%rsi)
411          xorq      %rax,%rax
412          ret
413          DEFERRED_SWITCH_CALL
414END(_ufetch_32)
415
416/* LINTSTUB: int _ufetch_64(const uint64_t *uaddr, uint64_t *valp); */
417ENTRY(_ufetch_64)
418          DEFERRED_SWITCH_CHECK
419          UFETCHSTORE_PROLOGUE(8)
420
421          SMAP_DISABLE
422.L_ufetch_64_start:
423          movq      (%rdi),%rax
424.L_ufetch_64_end:
425          SMAP_ENABLE
426
427          movq      %rax,(%rsi)
428          xorq      %rax,%rax
429          ret
430          DEFERRED_SWITCH_CALL
431END(_ufetch_64)
432
433/* LINTSTUB: int _ustore_8(uint8_t *uaddr, uint8_t val); */
434ENTRY(_ustore_8)
435          DEFERRED_SWITCH_CHECK
436          UFETCHSTORE_PROLOGUE(1)
437
438          SMAP_DISABLE
439.L_ustore_8_start:
440          movb      %sil,(%rdi)
441.L_ustore_8_end:
442          SMAP_ENABLE
443
444          xorq      %rax,%rax
445          ret
446          DEFERRED_SWITCH_CALL
447END(_ustore_8)
448
449/* LINTSTUB: int _ustore_16(uint16_t *uaddr, uint16_t val); */
450ENTRY(_ustore_16)
451          DEFERRED_SWITCH_CHECK
452          UFETCHSTORE_PROLOGUE(2)
453
454          SMAP_DISABLE
455.L_ustore_16_start:
456          movw      %si,(%rdi)
457.L_ustore_16_end:
458          SMAP_ENABLE
459
460          xorq      %rax,%rax
461          ret
462          DEFERRED_SWITCH_CALL
463END(_ustore_16)
464
465/* LINTSTUB: int _ustore_32(uint32_t *uaddr, uint32_t val); */
466ENTRY(_ustore_32)
467          DEFERRED_SWITCH_CHECK
468          UFETCHSTORE_PROLOGUE(4)
469
470          SMAP_DISABLE
471.L_ustore_32_start:
472          movl      %esi,(%rdi)
473.L_ustore_32_end:
474          SMAP_ENABLE
475
476          xorq      %rax,%rax
477          ret
478          DEFERRED_SWITCH_CALL
479END(_ustore_32)
480
481/* LINTSTUB: int _ustore_64(uint64_t *uaddr, uint64_t val); */
482ENTRY(_ustore_64)
483          DEFERRED_SWITCH_CHECK
484          UFETCHSTORE_PROLOGUE(8)
485
486          SMAP_DISABLE
487.L_ustore_64_start:
488          movq      %rsi,(%rdi)
489.L_ustore_64_end:
490          SMAP_ENABLE
491
492          xorq      %rax,%rax
493          ret
494          DEFERRED_SWITCH_CALL
495END(_ustore_64)
496
497/**************************************************************************/
498
499/*
500 * Compare-and-swap the 64-bit integer in the user-space.
501 *
502 * int    _ucas_64(volatile uint64_t *uptr, uint64_t old, uint64_t new,
503 *                   uint64_t *ret);
504 */
505ENTRY(_ucas_64)
506          DEFERRED_SWITCH_CHECK
507          /* Fail if kernel-space */
508          movq      $VM_MAXUSER_ADDRESS-8,%r8
509          cmpq      %r8,%rdi
510          ja        _C_LABEL(ucas_efault)
511          movq      %rsi,%rax
512
513          SMAP_DISABLE
514.Lucas64_start:
515          /* Perform the CAS */
516          lock
517          cmpxchgq %rdx,(%rdi)
518.Lucas64_end:
519          SMAP_ENABLE
520
521          /*
522           * Note: %rax is "old" value.
523           * Set the return values.
524           */
525          movq      %rax,(%rcx)
526          xorq      %rax,%rax
527          ret
528          DEFERRED_SWITCH_CALL
529END(_ucas_64)
530
531/*
532 * int    _ucas_32(volatile uint32_t *uptr, uint32_t old, uint32_t new,
533 *                   uint32_t *ret);
534 */
535ENTRY(_ucas_32)
536          DEFERRED_SWITCH_CHECK
537          /* Fail if kernel-space */
538          movq      $VM_MAXUSER_ADDRESS-4,%r8
539          cmpq      %r8,%rdi
540          ja        _C_LABEL(ucas_efault)
541          movl      %esi,%eax
542
543          SMAP_DISABLE
544.Lucas32_start:
545          /* Perform the CAS */
546          lock
547          cmpxchgl %edx,(%rdi)
548.Lucas32_end:
549          SMAP_ENABLE
550
551          /*
552           * Note: %eax is "old" value.
553           * Set the return values.
554           */
555          movl      %eax,(%rcx)
556          xorq      %rax,%rax
557          ret
558          DEFERRED_SWITCH_CALL
559END(_ucas_32)
560
561ENTRY(ucas_efault)
562          movq      $EFAULT,%rax
563          ret
564END(ucas_efault)
565
566ENTRY(ucas_fault)
567          SMAP_ENABLE
568          ret
569END(ucas_fault)
570
571/*
572 * Label must be after all copy functions.
573 */
574x86_copyfunc_end:   .globl    x86_copyfunc_end
575
576/*
577 * Fault table of copy functions for trap().
578 */
579          .section ".rodata"
580          .globl _C_LABEL(onfault_table)
581
582_C_LABEL(onfault_table):
583          .quad .Lcopyin_start
584          .quad .Lcopyin_end
585          .quad _C_LABEL(copy_fault)
586
587          .quad .Lcopyout_start
588          .quad .Lcopyout_end
589          .quad _C_LABEL(copy_fault)
590
591          .quad .Lkcopy_start
592          .quad .Lkcopy_end
593          .quad _C_LABEL(kcopy_fault)
594
595          .quad .Lcopyoutstr_start
596          .quad .Lcopyoutstr_end
597          .quad _C_LABEL(copystr_fault)
598
599          .quad .Lcopyinstr_start
600          .quad .Lcopyinstr_end
601          .quad _C_LABEL(copystr_fault)
602
603          .quad .Lucas64_start
604          .quad .Lucas64_end
605          .quad _C_LABEL(ucas_fault)
606
607          .quad .Lucas32_start
608          .quad .Lucas32_end
609          .quad _C_LABEL(ucas_fault)
610
611          .quad .L_ufetch_8_start
612          .quad .L_ufetch_8_end
613          .quad _C_LABEL(copy_fault)
614
615          .quad .L_ufetch_16_start
616          .quad .L_ufetch_16_end
617          .quad _C_LABEL(copy_fault)
618
619          .quad .L_ufetch_32_start
620          .quad .L_ufetch_32_end
621          .quad _C_LABEL(copy_fault)
622
623          .quad .L_ufetch_64_start
624          .quad .L_ufetch_64_end
625          .quad _C_LABEL(copy_fault)
626
627          .quad .L_ustore_8_start
628          .quad .L_ustore_8_end
629          .quad _C_LABEL(copy_fault)
630
631          .quad .L_ustore_16_start
632          .quad .L_ustore_16_end
633          .quad _C_LABEL(copy_fault)
634
635          .quad .L_ustore_32_start
636          .quad .L_ustore_32_end
637          .quad _C_LABEL(copy_fault)
638
639          .quad .L_ustore_64_start
640          .quad .L_ustore_64_end
641          .quad _C_LABEL(copy_fault)
642
643          .quad 0   /* terminate */
644
645          .text
646