xref: /freebsd-13-stable/lib/libc/amd64/string/strlen.S (revision 3d497e17ebd33fe0f58d773e35ab994d750258d6)
1/*
2 * Written by Mateusz Guzik <mjg@freebsd.org>
3 * Public domain.
4 */
5
6#include <machine/asm.h>
7/*
8 * Note: this routine was written with kernel use in mind (read: no simd),
9 * it is only present in userspace as a temporary measure until something
10 * better gets imported.
11 */
12
13#define ALIGN_TEXT      .p2align 4,0x90 /* 16-byte alignment, nop filled */
14
15/*
16 * strlen(string)
17 *	  %rdi
18 *
19 * Uses the ((x - 0x01....01) & ~x & 0x80....80) trick.
20 *
21 * 0x01....01 is replaced with 0x0 - 0x01....01 so that it can be added
22 * with leaq.
23 *
24 * For a description see either:
25 * - "Hacker's Delight" by Henry S. Warren, Jr.
26 * - "Optimizing subroutines in assembly language: An optimization guide for x86 platforms"
27 *   by Agner Fog
28 *
29 * The latter contains a 32-bit variant of the same algorithm coded in assembly for i386.
30 */
31ENTRY(strlen)
32	movabsq	$0xfefefefefefefeff,%r8
33	movabsq	$0x8080808080808080,%r9
34
35	movq	%rdi,%r10
36	movq	%rdi,%rcx
37	testb	$7,%dil
38	jz	2f
39
40	/*
41	 * Handle misaligned reads: align to 8 and fill
42	 * the spurious bytes.
43	 */
44	andq	$~7,%rdi
45	movq	(%rdi),%r11
46	shlq	$3,%rcx
47	movq	$-1,%rdx
48	shlq	%cl,%rdx
49	notq	%rdx
50	orq	%rdx,%r11
51
52	leaq	(%r11,%r8),%rcx
53	notq	%r11
54	andq	%r11,%rcx
55	andq	%r9,%rcx
56	jnz	3f
57
58	/*
59	 * Main loop.
60	 */
61	ALIGN_TEXT
621:
63	leaq	8(%rdi),%rdi
642:
65	movq	(%rdi),%r11
66	leaq	(%r11,%r8),%rcx
67	notq	%r11
68	andq	%r11,%rcx
69	andq	%r9,%rcx
70	jz	1b
713:
72	bsfq	%rcx,%rcx
73	shrq	$3,%rcx
74	leaq	(%rcx,%rdi),%rax
75	subq	%r10,%rax
76	ret
77END(strlen)
78
79	.section .note.GNU-stack,"",%progbits
80