1 | /* Optimized strncmp implementation for PowerPC476. |
2 | Copyright (C) 2010-2024 Free Software Foundation, Inc. |
3 | This file is part of the GNU C Library. |
4 | |
5 | The GNU C Library is free software; you can redistribute it and/or |
6 | modify it under the terms of the GNU Lesser General Public |
7 | License as published by the Free Software Foundation; either |
8 | version 2.1 of the License, or (at your option) any later version. |
9 | |
10 | The GNU C Library is distributed in the hope that it will be useful, |
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
13 | Lesser General Public License for more details. |
14 | |
15 | You should have received a copy of the GNU Lesser General Public |
16 | License along with the GNU C Library. If not, see |
17 | <https://www.gnu.org/licenses/>. */ |
18 | |
19 | #include <sysdep.h> |
20 | |
21 | /* strncmp |
22 | |
23 | Register Use |
24 | r0:temp return equality |
25 | r3:source1 address, return equality |
26 | r4:source2 address |
27 | r5:byte count |
28 | |
29 | Implementation description |
30 | Touch in 3 lines of D-cache. |
31 | If source1 or source2 is unaligned copy 0-3 bytes to make source1 aligned |
32 | Check 2 words from src1 and src2. If unequal jump to end and |
33 | return src1 > src2 or src1 < src2. |
34 | If null check bytes before null and then jump to end and |
35 | return src1 > src2, src1 < src2 or src1 = src2. |
36 | If count = zero check bytes before zero counter and then jump to end and |
37 | return src1 > src2, src1 < src2 or src1 = src2. |
38 | If src1 = src2 and no null, repeat. */ |
39 | |
40 | EALIGN (strncmp,5,0) |
41 | neg r7,r3 |
42 | clrlwi r7,r7,20 |
43 | neg r8,r4 |
44 | clrlwi r8,r8,20 |
45 | srwi. r7,r7,3 |
46 | beq L(prebyte_count_loop) |
47 | srwi. r8,r8,3 |
48 | beq L(prebyte_count_loop) |
49 | cmplw r7,r8 |
50 | mtctr r7 |
51 | ble L(preword2_count_loop) |
52 | mtctr r8 |
53 | |
54 | L(preword2_count_loop): |
55 | srwi. r6,r5,3 |
56 | beq L(prebyte_count_loop) |
57 | mfctr r7 |
58 | cmplw r6,r7 |
59 | bgt L(set_count_loop) |
60 | mtctr r6 |
61 | clrlwi r5,r5,29 |
62 | |
63 | L(word2_count_loop): |
64 | lwz r10,0(r3) |
65 | lwz r6,4(r3) |
66 | addi r3,r3,0x08 |
67 | lwz r8,0(r4) |
68 | lwz r9,4(r4) |
69 | addi r4,r4,0x08 |
70 | dlmzb. r12,r10,r6 |
71 | bne L(end_check) |
72 | cmplw r10,r8 |
73 | bne L(st1) |
74 | cmplw r6,r9 |
75 | bne L(st1) |
76 | bdnz L(word2_count_loop) |
77 | |
78 | L(prebyte_count_loop): |
79 | addi r5,r5,1 |
80 | mtctr r5 |
81 | bdz L(end_strncmp) |
82 | |
83 | L(byte_count_loop): |
84 | lbz r6,0(r3) |
85 | addi r3,r3,1 |
86 | lbz r7,0(r4) |
87 | addi r4,r4,1 |
88 | cmplw r6,r7 |
89 | bne L(st1) |
90 | cmpwi r6,0 |
91 | beq L(end_strncmp) |
92 | bdnz L(byte_count_loop) |
93 | b L(end_strncmp) |
94 | |
95 | L(set_count_loop): |
96 | slwi r7,r7,3 |
97 | subf r5,r7,r5 |
98 | b L(word2_count_loop) |
99 | |
100 | L(end_check): |
101 | subfic r12,r12,4 |
102 | blt L(end_check2) |
103 | rlwinm r12,r12,3,0,31 |
104 | srw r10,r10,r12 |
105 | srw r8,r8,r12 |
106 | cmplw r10,r8 |
107 | bne L(st1) |
108 | b L(end_strncmp) |
109 | |
110 | L(end_check2): |
111 | addi r12,r12,4 |
112 | cmplw r10,r8 |
113 | rlwinm r12,r12,3,0,31 |
114 | bne L(st1) |
115 | srw r6,r6,r12 |
116 | srw r9,r9,r12 |
117 | cmplw r6,r9 |
118 | bne L(st1) |
119 | |
120 | L(end_strncmp): |
121 | addi r3,r0,0 |
122 | blr |
123 | |
124 | L(st1): |
125 | mfcr r3 |
126 | blr |
127 | END (strncmp) |
128 | libc_hidden_builtin_def (strncmp) |
129 | |