1/*-
2 * Copyright (c) 2013 The NetBSD Foundation, Inc.
3 * All rights reserved.
4 *
5 * This code is derived from software contributed to The NetBSD Foundation
6 * by Matt Thomas of 3am Software Foundry.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
18 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
19 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
20 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
21 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27 * POSSIBILITY OF SUCH DAMAGE.
28 */
29
30#include <machine/asm.h>
31
32RCSID("$NetBSD: strrchr_arm.S,v 1.6 2013/08/25 06:15:06 matt Exp $")
33
34#ifdef __ARMEL__
35#define   BYTE0     0x000000ff
36#define   BYTE1     0x0000ff00
37#define   BYTE2     0x00ff0000
38#define   BYTE3     0xff000000
39#define   lshi      lsl
40#define   lshis     lsls
41#else
42#define   BYTE0     0xff000000
43#define   BYTE1     0x00ff0000
44#define   BYTE2     0x0000ff00
45#define   BYTE3     0x000000ff
46#define   lshi      lsr
47#define   lshis     lsrs
48#endif
49
50ENTRY(strrchr)
51          ands      r2, r1, #0xff                 /* is the byte value NUL? */
52          bne       1f                            /*   no, do it the hard way */
53          push      {r0, lr}            /* save pointer and return addr */
54          bl        PLT_SYM(strlen)               /* get length */
55          pop       {r1, r2}            /* restore pointer / return addr */
56          adds      r0, r0, r1                    /* add pointer to length */
57          RETr(r2)                      /* return */
58
591:        mov       r1, r0                        /* we use r0 at the return value */
60          movs      r0, #0                        /* return NULL by default */
612:        tst       r1, #3                        /* test for word alignment */
62          beq       .Lpre_main_loop               /*   finally word aligned */
63          ldrb      r3, [r1], #1                  /* load a byte */
64          cmp       r3, r2                        /* did it match? */
65#ifdef __thumb__
66          it        eq
67#endif
68          subeq     r0, r1, #1                    /*   yes, remember that it did */
69          cmp       r3, #0                        /* was it NUL? */
70          bne       2b                            /*   no, try next byte */
71          RET                                     /* return */
72.Lpre_main_loop:
73          push      {r4, r5}            /* save some registers */
74#if defined(_ARM_ARCH_7)
75          movw      ip, #0xfefe                   /* magic constant; 254 in each byte */
76          movt      ip, #0xfefe                   /* magic constant; 254 in each byte */
77#elif defined(_ARM_ARCH_6)
78          mov       ip, #0xfe           /* put 254 in low byte */
79          orr       ip, ip, ip, lsl #8  /* move to next byte */
80          orr       ip, ip, ip, lsl #16 /* move to next halfword */
81#endif /* _ARM_ARCH_6 */
82          orr       r2, r2, r2, lsl #8  /* move to next byte */
83          orr       r2, r2, r2, lsl #16 /* move to next halfword */
84.Lmain_loop:
85          ldr       r3, [r1], #4                  /* load next word */
86#if defined(_ARM_ARCH_6)
87          /*
88           * Add 254 to each byte using the UQADD8 (unsigned saturating add 8)
89           * instruction.  For every non-NUL byte, the result for that byte will
90           * become 255.  For NUL, it will be 254.  When we complement the
91           * result, if the result is non-0 then we must have encountered a NUL.
92           */
93          uqadd8    r4, r3, ip                    /* NUL detection happens here */
94          usub8     r3, r3, r2                    /* bias for char looked for? */
95          uqadd8    r5, r3, ip                    /* char detection happens here */
96          ands      r3, r4, r5                    /* merge results */
97          mvns      r3, r3                        /* is the complement non-0? */
98          beq       .Lmain_loop                   /*   no, then keep going */
99
100          mvns      r5, r5                        /* get we find any matching bytes? */
101          beq       .Ldone                        /*   no, then we hit the end, return */
102          mvns      r4, r4                        /* did we encounter a NUL? */
103          beq       .Lfind_match                  /*   no, find matching byte */
104          /*
105           * Copy the NUL bit to the following byte lanes.  Then clear any match
106           * bits in those byte lanes to prevent false positives in those bytes.
107           */
108          bics      r5, r5, r4                    /* clear any NUL match bits */
109          beq       .Ldone                        /*   no remaining matches, we're done */
110          lshis     r3, r4, #8                    /* shift up a byte */
111#ifdef __thumb__
112          itt       ne
113#endif
114          orrsne    r3, r3, r3, lshi #8 /* if non 0, copy up to next byte */
115          orrsne    r3, r3, r3, lshi #8 /* if non 0, copy up to last byte */
116          bics      r5, r5, r3                    /* clear match bits */
117          beq       .Ldone                        /*   no remaining matches, we're done */
118.Lfind_match:
119#ifdef __ARMEL__
120          rev       r5, r5                        /* we want this in BE for the CLZ */
121#endif
122          /*
123           * If we have multiple matches, we want to the select the "last" match
124           * in the word which will be the lowest bit set.
125           */
126          subs      r3, r5, #1                    /* subtract 1 */
127          ands      r3, r3, r5                    /* and with mask */
128          eors      r5, r5, r3                    /* only have the lowest bit set left */
129          clz       r5, r5                        /* count how many leading zeros */
130          add       r0, r1, r5, lsr #3  /* divide that by 8 and add to count */
131          subs      r0, r0, #4                    /* compensate for the post-inc */
132          cmp       r4, #0                        /* did we read any NULs? */
133          beq       .Lmain_loop                   /*   no, get next word */
134#else
135          /*
136           * No fancy shortcuts so just test each byte lane for a NUL.
137           * (other tests for NULs in a word take more instructions/cycles).
138           */
139          eor       r4, r3, r2                    /* xor .. */
140          tst       r3, #BYTE0                    /* is byte 0 a NUL? */
141          beq       .Ldone                        /*   yes, then we're done */
142          tst       r4, #BYTE0                    /* is byte 0 a match? */
143          subeq     r0, r1, #4                    /*   yes, remember its location */
144          tst       r3, #BYTE1                    /* is byte 1 a NUL? */
145          beq       .Ldone                        /*   yes, then we're done */
146          tst       r4, #BYTE1                    /* is byte 1 a match? */
147          subeq     r0, r1, #3                    /*   yes, remember its location */
148          tst       r3, #BYTE2                    /* is byte 2 a NUL? */
149          beq       .Ldone                        /*   yes, then we're done */
150          tst       r4, #BYTE2                    /* is byte 2 a match? */
151          subeq     r0, r1, #2                    /*   yes, remember its location */
152          tst       r3, #BYTE3                    /* is byte 3 a NUL? */
153          beq       .Ldone                        /*   yes, then we're done */
154          tst       r4, #BYTE3                    /* is byte 3 a match? */
155          subeq     r0, r1, #1                    /*   yes, remember its location */
156          b         .Lmain_loop
157#endif /* _ARM_ARCH_6 */
158.Ldone:
159          pop       {r4, r5}
160          RET
161END(strrchr)
162