Warning: That file was not part of the compilation database. It may have many parsing errors.

1 | /* Copyright (C) 1996-2019 Free Software Foundation, Inc. |
---|---|

2 | Contributed by Richard Henderson (rth@tamu.edu) |

3 | This file is part of the GNU C Library. |

4 | |

5 | The GNU C Library is free software; you can redistribute it and/or |

6 | modify it under the terms of the GNU Lesser General Public |

7 | License as published by the Free Software Foundation; either |

8 | version 2.1 of the License, or (at your option) any later version. |

9 | |

10 | The GNU C Library is distributed in the hope that it will be useful, |

11 | but WITHOUT ANY WARRANTY; without even the implied warranty of |

12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |

13 | Lesser General Public License for more details. |

14 | |

15 | You should have received a copy of the GNU Lesser General Public |

16 | License along with the GNU C Library. If not, see |

17 | <http://www.gnu.org/licenses/>. */ |

18 | |

19 | /* Bytewise compare two null-terminated strings of length no longer than N. */ |

20 | |

21 | #include <sysdep.h> |

22 | |

23 | .set noat |

24 | .set noreorder |

25 | |

26 | /* EV6 only predicts one branch per octaword. We'll use these to push |

27 | subsequent branches back to the next bundle. This will generally add |

28 | a fetch+decode cycle to older machines, so skip in that case. */ |

29 | #ifdef __alpha_fix__ |

30 | # define ev6_unop unop |

31 | #else |

32 | # define ev6_unop |

33 | #endif |

34 | |

35 | .text |

36 | |

37 | ENTRY(strncmp) |

38 | #ifdef PROF |

39 | ldgp gp, 0(pv) |

40 | lda AT, _mcount |

41 | jsr AT, (AT), _mcount |

42 | .prologue 1 |

43 | #else |

44 | .prologue 0 |

45 | #endif |

46 | |

47 | xor a0, a1, t2 # are s1 and s2 co-aligned? |

48 | beq a2, $zerolength |

49 | ldq_u t0, 0(a0) # load asap to give cache time to catch up |

50 | ldq_u t1, 0(a1) |

51 | lda t3, -1 |

52 | and t2, 7, t2 |

53 | srl t3, 1, t6 |

54 | and a0, 7, t4 # find s1 misalignment |

55 | and a1, 7, t5 # find s2 misalignment |

56 | cmovlt a2, t6, a2 # bound neg count to LONG_MAX |

57 | addq a1, a2, a3 # s2+count |

58 | addq a2, t4, a2 # bias count by s1 misalignment |

59 | and a2, 7, t10 # ofs of last byte in s1 last word |

60 | srl a2, 3, a2 # remaining full words in s1 count |

61 | bne t2, $unaligned |

62 | |

63 | /* On entry to this basic block: |

64 | t0 == the first word of s1. |

65 | t1 == the first word of s2. |

66 | t3 == -1. */ |

67 | $aligned: |

68 | mskqh t3, a1, t8 # mask off leading garbage |

69 | ornot t1, t8, t1 |

70 | ornot t0, t8, t0 |

71 | cmpbge zero, t1, t7 # bits set iff null found |

72 | beq a2, $eoc # check end of count |

73 | bne t7, $eos |

74 | beq t10, $ant_loop |

75 | |

76 | /* Aligned compare main loop. |

77 | On entry to this basic block: |

78 | t0 == an s1 word. |

79 | t1 == an s2 word not containing a null. */ |

80 | |

81 | .align 4 |

82 | $a_loop: |

83 | xor t0, t1, t2 # e0 : |

84 | bne t2, $wordcmp # .. e1 (zdb) |

85 | ldq_u t1, 8(a1) # e0 : |

86 | ldq_u t0, 8(a0) # .. e1 : |

87 | |

88 | subq a2, 1, a2 # e0 : |

89 | addq a1, 8, a1 # .. e1 : |

90 | addq a0, 8, a0 # e0 : |

91 | beq a2, $eoc # .. e1 : |

92 | |

93 | cmpbge zero, t1, t7 # e0 : |

94 | beq t7, $a_loop # .. e1 : |

95 | |

96 | br $eos |

97 | |

98 | /* Alternate aligned compare loop, for when there's no trailing |

99 | bytes on the count. We have to avoid reading too much data. */ |

100 | .align 4 |

101 | $ant_loop: |

102 | xor t0, t1, t2 # e0 : |

103 | ev6_unop |

104 | ev6_unop |

105 | bne t2, $wordcmp # .. e1 (zdb) |

106 | |

107 | subq a2, 1, a2 # e0 : |

108 | beq a2, $zerolength # .. e1 : |

109 | ldq_u t1, 8(a1) # e0 : |

110 | ldq_u t0, 8(a0) # .. e1 : |

111 | |

112 | addq a1, 8, a1 # e0 : |

113 | addq a0, 8, a0 # .. e1 : |

114 | cmpbge zero, t1, t7 # e0 : |

115 | beq t7, $ant_loop # .. e1 : |

116 | |

117 | br $eos |

118 | |

119 | /* The two strings are not co-aligned. Align s1 and cope. */ |

120 | /* On entry to this basic block: |

121 | t0 == the first word of s1. |

122 | t1 == the first word of s2. |

123 | t3 == -1. |

124 | t4 == misalignment of s1. |

125 | t5 == misalignment of s2. |

126 | t10 == misalignment of s1 end. */ |

127 | .align 4 |

128 | $unaligned: |

129 | /* If s1 misalignment is larger than s2 misalignment, we need |

130 | extra startup checks to avoid SEGV. */ |

131 | subq a1, t4, a1 # adjust s2 for s1 misalignment |

132 | cmpult t4, t5, t9 |

133 | subq a3, 1, a3 # last byte of s2 |

134 | bic a1, 7, t8 |

135 | mskqh t3, t5, t7 # mask garbage in s2 |

136 | subq a3, t8, a3 |

137 | ornot t1, t7, t7 |

138 | srl a3, 3, a3 # remaining full words in s2 count |

139 | beq t9, $u_head |

140 | |

141 | /* Failing that, we need to look for both eos and eoc within the |

142 | first word of s2. If we find either, we can continue by |

143 | pretending that the next word of s2 is all zeros. */ |

144 | lda t2, 0 # next = zero |

145 | cmpeq a3, 0, t8 # eoc in the first word of s2? |

146 | cmpbge zero, t7, t7 # eos in the first word of s2? |

147 | or t7, t8, t8 |

148 | bne t8, $u_head_nl |

149 | |

150 | /* We know just enough now to be able to assemble the first |

151 | full word of s2. We can still find a zero at the end of it. |

152 | |

153 | On entry to this basic block: |

154 | t0 == first word of s1 |

155 | t1 == first partial word of s2. |

156 | t3 == -1. |

157 | t10 == ofs of last byte in s1 last word. |

158 | t11 == ofs of last byte in s2 last word. */ |

159 | $u_head: |

160 | ldq_u t2, 8(a1) # load second partial s2 word |

161 | subq a3, 1, a3 |

162 | $u_head_nl: |

163 | extql t1, a1, t1 # create first s2 word |

164 | mskqh t3, a0, t8 |

165 | extqh t2, a1, t4 |

166 | ornot t0, t8, t0 # kill s1 garbage |

167 | or t1, t4, t1 # s2 word now complete |

168 | cmpbge zero, t0, t7 # find eos in first s1 word |

169 | ornot t1, t8, t1 # kill s2 garbage |

170 | beq a2, $eoc |

171 | subq a2, 1, a2 |

172 | bne t7, $eos |

173 | mskql t3, a1, t8 # mask out s2[1] bits we have seen |

174 | xor t0, t1, t4 # compare aligned words |

175 | or t2, t8, t8 |

176 | bne t4, $wordcmp |

177 | cmpbge zero, t8, t7 # eos in high bits of s2[1]? |

178 | cmpeq a3, 0, t8 # eoc in s2[1]? |

179 | or t7, t8, t7 |

180 | bne t7, $u_final |

181 | |

182 | /* Unaligned copy main loop. In order to avoid reading too much, |

183 | the loop is structured to detect zeros in aligned words from s2. |

184 | This has, unfortunately, effectively pulled half of a loop |

185 | iteration out into the head and half into the tail, but it does |

186 | prevent nastiness from accumulating in the very thing we want |

187 | to run as fast as possible. |

188 | |

189 | On entry to this basic block: |

190 | t2 == the unshifted low-bits from the next s2 word. |

191 | t10 == ofs of last byte in s1 last word. |

192 | t11 == ofs of last byte in s2 last word. */ |

193 | .align 4 |

194 | $u_loop: |

195 | extql t2, a1, t3 # e0 : |

196 | ldq_u t2, 16(a1) # .. e1 : load next s2 high bits |

197 | ldq_u t0, 8(a0) # e0 : load next s1 word |

198 | addq a1, 8, a1 # .. e1 : |

199 | |

200 | addq a0, 8, a0 # e0 : |

201 | subq a3, 1, a3 # .. e1 : |

202 | extqh t2, a1, t1 # e0 : |

203 | cmpbge zero, t0, t7 # .. e1 : eos in current s1 word |

204 | |

205 | or t1, t3, t1 # e0 : |

206 | beq a2, $eoc # .. e1 : eoc in current s1 word |

207 | subq a2, 1, a2 # e0 : |

208 | cmpbge zero, t2, t4 # .. e1 : eos in s2[1] |

209 | |

210 | xor t0, t1, t3 # e0 : compare the words |

211 | ev6_unop |

212 | ev6_unop |

213 | bne t7, $eos # .. e1 : |

214 | |

215 | cmpeq a3, 0, t5 # e0 : eoc in s2[1] |

216 | ev6_unop |

217 | ev6_unop |

218 | bne t3, $wordcmp # .. e1 : |

219 | |

220 | or t4, t5, t4 # e0 : eos or eoc in s2[1]. |

221 | beq t4, $u_loop # .. e1 (zdb) |

222 | |

223 | /* We've found a zero in the low bits of the last s2 word. Get |

224 | the next s1 word and align them. */ |

225 | .align 3 |

226 | $u_final: |

227 | ldq_u t0, 8(a0) |

228 | extql t2, a1, t1 |

229 | cmpbge zero, t1, t7 |

230 | bne a2, $eos |

231 | |

232 | /* We've hit end of count. Zero everything after the count |

233 | and compare whats left. */ |

234 | .align 3 |

235 | $eoc: |

236 | mskql t0, t10, t0 |

237 | mskql t1, t10, t1 |

238 | cmpbge zero, t1, t7 |

239 | |

240 | /* We've found a zero somewhere in a word we just read. |

241 | On entry to this basic block: |

242 | t0 == s1 word |

243 | t1 == s2 word |

244 | t7 == cmpbge mask containing the zero. */ |

245 | .align 3 |

246 | $eos: |

247 | negq t7, t6 # create bytemask of valid data |

248 | and t6, t7, t8 |

249 | subq t8, 1, t6 |

250 | or t6, t8, t7 |

251 | zapnot t0, t7, t0 # kill the garbage |

252 | zapnot t1, t7, t1 |

253 | xor t0, t1, v0 # ... and compare |

254 | beq v0, $done |

255 | |

256 | /* Here we have two differing co-aligned words in t0 & t1. |

257 | Bytewise compare them and return (t0 > t1 ? 1 : -1). */ |

258 | .align 3 |

259 | $wordcmp: |

260 | cmpbge t0, t1, t2 # comparison yields bit mask of ge |

261 | cmpbge t1, t0, t3 |

262 | xor t2, t3, t0 # bits set iff t0/t1 bytes differ |

263 | negq t0, t1 # clear all but least bit |

264 | and t0, t1, t0 |

265 | lda v0, -1 |

266 | and t0, t2, t1 # was bit set in t0 > t1? |

267 | cmovne t1, 1, v0 |

268 | $done: |

269 | ret |

270 | |

271 | .align 3 |

272 | $zerolength: |

273 | clr v0 |

274 | ret |

275 | |

276 | END(strncmp) |

277 | libc_hidden_builtin_def (strncmp) |

278 |

Warning: That file was not part of the compilation database. It may have many parsing errors.