1 | /* Optimized version of the standard strlen() function. |
2 | This file is part of the GNU C Library. |
3 | Copyright (C) 2000-2022 Free Software Foundation, Inc. |
4 | |
5 | The GNU C Library is free software; you can redistribute it and/or |
6 | modify it under the terms of the GNU Lesser General Public |
7 | License as published by the Free Software Foundation; either |
8 | version 2.1 of the License, or (at your option) any later version. |
9 | |
10 | The GNU C Library is distributed in the hope that it will be useful, |
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
13 | Lesser General Public License for more details. |
14 | |
15 | You should have received a copy of the GNU Lesser General Public |
16 | License along with the GNU C Library; if not, see |
17 | <https://www.gnu.org/licenses/>. */ |
18 | |
19 | /* Return: the length of the input string |
20 | |
21 | Input: |
22 | in0: str |
23 | |
24 | Look for the null character byte by byte, until we reach a word aligned |
25 | address, then search word by word, using the czx instruction. We're |
26 | also doing one word of read ahead, which could cause problems if the |
27 | null character is on the last word of a page and the next page is not |
28 | mapped in the process address space. Hence the use of the speculative |
29 | load. |
30 | |
31 | This implementation assumes little endian mode. For big endian mode, |
32 | the instruction czx1.r should be replaced by czx1.l. */ |
33 | |
34 | #include <sysdep.h> |
35 | #undef ret |
36 | |
37 | #define saved_lc r18 |
38 | #define str r19 |
39 | #define pos0 r20 |
40 | #define val1 r21 |
41 | #define val2 r22 |
42 | #define origadd r23 |
43 | #define tmp r24 |
44 | #define loopcnt r30 |
45 | #define len ret0 |
46 | |
47 | ENTRY(strlen) |
48 | .prologue |
49 | alloc r2 = ar.pfs, 1, 0, 0, 0 |
50 | .save ar.lc, saved_lc |
51 | mov saved_lc = ar.lc // save the loop counter |
52 | .body |
53 | mov str = in0 |
54 | mov len = r0 // len = 0 |
55 | and tmp = 7, in0 // tmp = str % 8 |
56 | ;; |
57 | sub loopcnt = 8, tmp // loopcnt = 8 - tmp |
58 | cmp.eq p6, p0 = tmp, r0 |
59 | (p6) br.cond.sptk .str_aligned;; |
60 | adds loopcnt = -1, loopcnt;; |
61 | mov ar.lc = loopcnt |
62 | .l1: |
63 | ld1 val2 = [str], 1 |
64 | ;; |
65 | cmp.eq p6, p0 = val2, r0 |
66 | (p6) br.cond.spnt .restore_and_exit |
67 | adds len = 1, len |
68 | br.cloop.dptk .l1 |
69 | .str_aligned: |
70 | mov origadd = str // origadd = orig |
71 | ld8 val1 = [str], 8;; |
72 | nop.b 0 |
73 | nop.b 0 |
74 | .l2: ld8.s val2 = [str], 8 // don't bomb out here |
75 | czx1.r pos0 = val1 |
76 | ;; |
77 | cmp.ne p6, p0 = 8, pos0 |
78 | (p6) br.cond.spnt .foundit |
79 | chk.s val2, .recovery |
80 | .back: |
81 | mov val1 = val2 |
82 | br.cond.dptk .l2 |
83 | .foundit: |
84 | sub tmp = str, origadd // tmp = crt address - orig |
85 | add len = len, pos0;; |
86 | add len = len, tmp;; |
87 | adds len = -16, len |
88 | .restore_and_exit: |
89 | mov ar.lc = saved_lc // restore the loop counter |
90 | br.ret.sptk.many b0 |
91 | .recovery: |
92 | adds str = -8, str;; |
93 | ld8 val2 = [str], 8 // bomb out here |
94 | br.cond.sptk .back |
95 | END(strlen) |
96 | libc_hidden_builtin_def (strlen) |
97 | |