1 | /* Optimized wcscmp for x86-64 with SSE2. |
---|---|

2 | Copyright (C) 2011-2019 Free Software Foundation, Inc. |

3 | Contributed by Intel Corporation. |

4 | This file is part of the GNU C Library. |

5 | |

6 | The GNU C Library is free software; you can redistribute it and/or |

7 | modify it under the terms of the GNU Lesser General Public |

8 | License as published by the Free Software Foundation; either |

9 | version 2.1 of the License, or (at your option) any later version. |

10 | |

11 | The GNU C Library is distributed in the hope that it will be useful, |

12 | but WITHOUT ANY WARRANTY; without even the implied warranty of |

13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |

14 | Lesser General Public License for more details. |

15 | |

16 | You should have received a copy of the GNU Lesser General Public |

17 | License along with the GNU C Library; if not, see |

18 | <http://www.gnu.org/licenses/>. */ |

19 | |

20 | #include <sysdep.h> |

21 | |

22 | /* Note: wcscmp uses signed comparison, not unsighed as in strcmp function. */ |

23 | |

24 | .text |

25 | ENTRY (__wcscmp) |

26 | /* |

27 | * This implementation uses SSE to compare up to 16 bytes at a time. |

28 | */ |

29 | mov %esi, %eax |

30 | mov %edi, %edx |

31 | pxor %xmm0, %xmm0 /* clear %xmm0 for null char checks */ |

32 | mov %al, %ch |

33 | mov %dl, %cl |

34 | and $63, %eax /* rsi alignment in cache line */ |

35 | and $63, %edx /* rdi alignment in cache line */ |

36 | and $15, %cl |

37 | jz L(continue_00) |

38 | cmp $16, %edx |

39 | jb L(continue_0) |

40 | cmp $32, %edx |

41 | jb L(continue_16) |

42 | cmp $48, %edx |

43 | jb L(continue_32) |

44 | |

45 | L(continue_48): |

46 | and $15, %ch |

47 | jz L(continue_48_00) |

48 | cmp $16, %eax |

49 | jb L(continue_0_48) |

50 | cmp $32, %eax |

51 | jb L(continue_16_48) |

52 | cmp $48, %eax |

53 | jb L(continue_32_48) |

54 | |

55 | .p2align 4 |

56 | L(continue_48_48): |

57 | mov (%rsi), %ecx |

58 | cmp %ecx, (%rdi) |

59 | jne L(nequal) |

60 | test %ecx, %ecx |

61 | jz L(equal) |

62 | |

63 | mov 4(%rsi), %ecx |

64 | cmp %ecx, 4(%rdi) |

65 | jne L(nequal) |

66 | test %ecx, %ecx |

67 | jz L(equal) |

68 | |

69 | mov 8(%rsi), %ecx |

70 | cmp %ecx, 8(%rdi) |

71 | jne L(nequal) |

72 | test %ecx, %ecx |

73 | jz L(equal) |

74 | |

75 | mov 12(%rsi), %ecx |

76 | cmp %ecx, 12(%rdi) |

77 | jne L(nequal) |

78 | test %ecx, %ecx |

79 | jz L(equal) |

80 | |

81 | movdqu 16(%rdi), %xmm1 |

82 | movdqu 16(%rsi), %xmm2 |

83 | pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ |

84 | pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ |

85 | psubb %xmm0, %xmm1 /* packed sub of comparison results*/ |

86 | pmovmskb %xmm1, %edx |

87 | sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ |

88 | jnz L(less4_double_words_16) |

89 | |

90 | movdqu 32(%rdi), %xmm1 |

91 | movdqu 32(%rsi), %xmm2 |

92 | pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ |

93 | pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ |

94 | psubb %xmm0, %xmm1 /* packed sub of comparison results*/ |

95 | pmovmskb %xmm1, %edx |

96 | sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ |

97 | jnz L(less4_double_words_32) |

98 | |

99 | movdqu 48(%rdi), %xmm1 |

100 | movdqu 48(%rsi), %xmm2 |

101 | pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ |

102 | pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ |

103 | psubb %xmm0, %xmm1 /* packed sub of comparison results*/ |

104 | pmovmskb %xmm1, %edx |

105 | sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ |

106 | jnz L(less4_double_words_48) |

107 | |

108 | add $64, %rsi |

109 | add $64, %rdi |

110 | jmp L(continue_48_48) |

111 | |

112 | L(continue_0): |

113 | and $15, %ch |

114 | jz L(continue_0_00) |

115 | cmp $16, %eax |

116 | jb L(continue_0_0) |

117 | cmp $32, %eax |

118 | jb L(continue_0_16) |

119 | cmp $48, %eax |

120 | jb L(continue_0_32) |

121 | |

122 | .p2align 4 |

123 | L(continue_0_48): |

124 | mov (%rsi), %ecx |

125 | cmp %ecx, (%rdi) |

126 | jne L(nequal) |

127 | test %ecx, %ecx |

128 | jz L(equal) |

129 | |

130 | mov 4(%rsi), %ecx |

131 | cmp %ecx, 4(%rdi) |

132 | jne L(nequal) |

133 | test %ecx, %ecx |

134 | jz L(equal) |

135 | |

136 | mov 8(%rsi), %ecx |

137 | cmp %ecx, 8(%rdi) |

138 | jne L(nequal) |

139 | test %ecx, %ecx |

140 | jz L(equal) |

141 | |

142 | mov 12(%rsi), %ecx |

143 | cmp %ecx, 12(%rdi) |

144 | jne L(nequal) |

145 | test %ecx, %ecx |

146 | jz L(equal) |

147 | |

148 | movdqu 16(%rdi), %xmm1 |

149 | movdqu 16(%rsi), %xmm2 |

150 | pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ |

151 | pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ |

152 | psubb %xmm0, %xmm1 /* packed sub of comparison results*/ |

153 | pmovmskb %xmm1, %edx |

154 | sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ |

155 | jnz L(less4_double_words_16) |

156 | |

157 | movdqu 32(%rdi), %xmm1 |

158 | movdqu 32(%rsi), %xmm2 |

159 | pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ |

160 | pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ |

161 | psubb %xmm0, %xmm1 /* packed sub of comparison results*/ |

162 | pmovmskb %xmm1, %edx |

163 | sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ |

164 | jnz L(less4_double_words_32) |

165 | |

166 | mov 48(%rsi), %ecx |

167 | cmp %ecx, 48(%rdi) |

168 | jne L(nequal) |

169 | test %ecx, %ecx |

170 | jz L(equal) |

171 | |

172 | mov 52(%rsi), %ecx |

173 | cmp %ecx, 52(%rdi) |

174 | jne L(nequal) |

175 | test %ecx, %ecx |

176 | jz L(equal) |

177 | |

178 | mov 56(%rsi), %ecx |

179 | cmp %ecx, 56(%rdi) |

180 | jne L(nequal) |

181 | test %ecx, %ecx |

182 | jz L(equal) |

183 | |

184 | mov 60(%rsi), %ecx |

185 | cmp %ecx, 60(%rdi) |

186 | jne L(nequal) |

187 | test %ecx, %ecx |

188 | jz L(equal) |

189 | |

190 | add $64, %rsi |

191 | add $64, %rdi |

192 | jmp L(continue_0_48) |

193 | |

194 | .p2align 4 |

195 | L(continue_00): |

196 | and $15, %ch |

197 | jz L(continue_00_00) |

198 | cmp $16, %eax |

199 | jb L(continue_00_0) |

200 | cmp $32, %eax |

201 | jb L(continue_00_16) |

202 | cmp $48, %eax |

203 | jb L(continue_00_32) |

204 | |

205 | .p2align 4 |

206 | L(continue_00_48): |

207 | pcmpeqd (%rdi), %xmm0 |

208 | mov (%rdi), %eax |

209 | pmovmskb %xmm0, %ecx |

210 | test %ecx, %ecx |

211 | jnz L(less4_double_words1) |

212 | |

213 | cmp (%rsi), %eax |

214 | jne L(nequal) |

215 | |

216 | mov 4(%rdi), %eax |

217 | cmp 4(%rsi), %eax |

218 | jne L(nequal) |

219 | |

220 | mov 8(%rdi), %eax |

221 | cmp 8(%rsi), %eax |

222 | jne L(nequal) |

223 | |

224 | mov 12(%rdi), %eax |

225 | cmp 12(%rsi), %eax |

226 | jne L(nequal) |

227 | |

228 | movdqu 16(%rsi), %xmm2 |

229 | pcmpeqd %xmm2, %xmm0 /* Any null double_word? */ |

230 | pcmpeqd 16(%rdi), %xmm2 /* compare first 4 double_words for equality */ |

231 | psubb %xmm0, %xmm2 /* packed sub of comparison results*/ |

232 | pmovmskb %xmm2, %edx |

233 | sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ |

234 | jnz L(less4_double_words_16) |

235 | |

236 | movdqu 32(%rsi), %xmm2 |

237 | pcmpeqd %xmm2, %xmm0 /* Any null double_word? */ |

238 | pcmpeqd 32(%rdi), %xmm2 /* compare first 4 double_words for equality */ |

239 | psubb %xmm0, %xmm2 /* packed sub of comparison results*/ |

240 | pmovmskb %xmm2, %edx |

241 | sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ |

242 | jnz L(less4_double_words_32) |

243 | |

244 | movdqu 48(%rsi), %xmm2 |

245 | pcmpeqd %xmm2, %xmm0 /* Any null double_word? */ |

246 | pcmpeqd 48(%rdi), %xmm2 /* compare first 4 double_words for equality */ |

247 | psubb %xmm0, %xmm2 /* packed sub of comparison results*/ |

248 | pmovmskb %xmm2, %edx |

249 | sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ |

250 | jnz L(less4_double_words_48) |

251 | |

252 | add $64, %rsi |

253 | add $64, %rdi |

254 | jmp L(continue_00_48) |

255 | |

256 | .p2align 4 |

257 | L(continue_32): |

258 | and $15, %ch |

259 | jz L(continue_32_00) |

260 | cmp $16, %eax |

261 | jb L(continue_0_32) |

262 | cmp $32, %eax |

263 | jb L(continue_16_32) |

264 | cmp $48, %eax |

265 | jb L(continue_32_32) |

266 | |

267 | .p2align 4 |

268 | L(continue_32_48): |

269 | mov (%rsi), %ecx |

270 | cmp %ecx, (%rdi) |

271 | jne L(nequal) |

272 | test %ecx, %ecx |

273 | jz L(equal) |

274 | |

275 | mov 4(%rsi), %ecx |

276 | cmp %ecx, 4(%rdi) |

277 | jne L(nequal) |

278 | test %ecx, %ecx |

279 | jz L(equal) |

280 | |

281 | mov 8(%rsi), %ecx |

282 | cmp %ecx, 8(%rdi) |

283 | jne L(nequal) |

284 | test %ecx, %ecx |

285 | jz L(equal) |

286 | |

287 | mov 12(%rsi), %ecx |

288 | cmp %ecx, 12(%rdi) |

289 | jne L(nequal) |

290 | test %ecx, %ecx |

291 | jz L(equal) |

292 | |

293 | mov 16(%rsi), %ecx |

294 | cmp %ecx, 16(%rdi) |

295 | jne L(nequal) |

296 | test %ecx, %ecx |

297 | jz L(equal) |

298 | |

299 | mov 20(%rsi), %ecx |

300 | cmp %ecx, 20(%rdi) |

301 | jne L(nequal) |

302 | test %ecx, %ecx |

303 | jz L(equal) |

304 | |

305 | mov 24(%rsi), %ecx |

306 | cmp %ecx, 24(%rdi) |

307 | jne L(nequal) |

308 | test %ecx, %ecx |

309 | jz L(equal) |

310 | |

311 | mov 28(%rsi), %ecx |

312 | cmp %ecx, 28(%rdi) |

313 | jne L(nequal) |

314 | test %ecx, %ecx |

315 | jz L(equal) |

316 | |

317 | movdqu 32(%rdi), %xmm1 |

318 | movdqu 32(%rsi), %xmm2 |

319 | pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ |

320 | pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ |

321 | psubb %xmm0, %xmm1 /* packed sub of comparison results*/ |

322 | pmovmskb %xmm1, %edx |

323 | sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ |

324 | jnz L(less4_double_words_32) |

325 | |

326 | movdqu 48(%rdi), %xmm1 |

327 | movdqu 48(%rsi), %xmm2 |

328 | pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ |

329 | pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ |

330 | psubb %xmm0, %xmm1 /* packed sub of comparison results*/ |

331 | pmovmskb %xmm1, %edx |

332 | sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ |

333 | jnz L(less4_double_words_48) |

334 | |

335 | add $64, %rsi |

336 | add $64, %rdi |

337 | jmp L(continue_32_48) |

338 | |

339 | .p2align 4 |

340 | L(continue_16): |

341 | and $15, %ch |

342 | jz L(continue_16_00) |

343 | cmp $16, %eax |

344 | jb L(continue_0_16) |

345 | cmp $32, %eax |

346 | jb L(continue_16_16) |

347 | cmp $48, %eax |

348 | jb L(continue_16_32) |

349 | |

350 | .p2align 4 |

351 | L(continue_16_48): |

352 | mov (%rsi), %ecx |

353 | cmp %ecx, (%rdi) |

354 | jne L(nequal) |

355 | test %ecx, %ecx |

356 | jz L(equal) |

357 | |

358 | mov 4(%rsi), %ecx |

359 | cmp %ecx, 4(%rdi) |

360 | jne L(nequal) |

361 | test %ecx, %ecx |

362 | jz L(equal) |

363 | |

364 | mov 8(%rsi), %ecx |

365 | cmp %ecx, 8(%rdi) |

366 | jne L(nequal) |

367 | test %ecx, %ecx |

368 | jz L(equal) |

369 | |

370 | mov 12(%rsi), %ecx |

371 | cmp %ecx, 12(%rdi) |

372 | jne L(nequal) |

373 | test %ecx, %ecx |

374 | jz L(equal) |

375 | |

376 | movdqu 16(%rdi), %xmm1 |

377 | movdqu 16(%rsi), %xmm2 |

378 | pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ |

379 | pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ |

380 | psubb %xmm0, %xmm1 /* packed sub of comparison results*/ |

381 | pmovmskb %xmm1, %edx |

382 | sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ |

383 | jnz L(less4_double_words_16) |

384 | |

385 | mov 32(%rsi), %ecx |

386 | cmp %ecx, 32(%rdi) |

387 | jne L(nequal) |

388 | test %ecx, %ecx |

389 | jz L(equal) |

390 | |

391 | mov 36(%rsi), %ecx |

392 | cmp %ecx, 36(%rdi) |

393 | jne L(nequal) |

394 | test %ecx, %ecx |

395 | jz L(equal) |

396 | |

397 | mov 40(%rsi), %ecx |

398 | cmp %ecx, 40(%rdi) |

399 | jne L(nequal) |

400 | test %ecx, %ecx |

401 | jz L(equal) |

402 | |

403 | mov 44(%rsi), %ecx |

404 | cmp %ecx, 44(%rdi) |

405 | jne L(nequal) |

406 | test %ecx, %ecx |

407 | jz L(equal) |

408 | |

409 | movdqu 48(%rdi), %xmm1 |

410 | movdqu 48(%rsi), %xmm2 |

411 | pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ |

412 | pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ |

413 | psubb %xmm0, %xmm1 /* packed sub of comparison results*/ |

414 | pmovmskb %xmm1, %edx |

415 | sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ |

416 | jnz L(less4_double_words_48) |

417 | |

418 | add $64, %rsi |

419 | add $64, %rdi |

420 | jmp L(continue_16_48) |

421 | |

422 | .p2align 4 |

423 | L(continue_00_00): |

424 | movdqa (%rdi), %xmm1 |

425 | pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ |

426 | pcmpeqd (%rsi), %xmm1 /* compare first 4 double_words for equality */ |

427 | psubb %xmm0, %xmm1 /* packed sub of comparison results*/ |

428 | pmovmskb %xmm1, %edx |

429 | sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ |

430 | jnz L(less4_double_words) |

431 | |

432 | movdqa 16(%rdi), %xmm3 |

433 | pcmpeqd %xmm3, %xmm0 /* Any null double_word? */ |

434 | pcmpeqd 16(%rsi), %xmm3 /* compare first 4 double_words for equality */ |

435 | psubb %xmm0, %xmm3 /* packed sub of comparison results*/ |

436 | pmovmskb %xmm3, %edx |

437 | sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ |

438 | jnz L(less4_double_words_16) |

439 | |

440 | movdqa 32(%rdi), %xmm5 |

441 | pcmpeqd %xmm5, %xmm0 /* Any null double_word? */ |

442 | pcmpeqd 32(%rsi), %xmm5 /* compare first 4 double_words for equality */ |

443 | psubb %xmm0, %xmm5 /* packed sub of comparison results*/ |

444 | pmovmskb %xmm5, %edx |

445 | sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ |

446 | jnz L(less4_double_words_32) |

447 | |

448 | movdqa 48(%rdi), %xmm1 |

449 | pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ |

450 | pcmpeqd 48(%rsi), %xmm1 /* compare first 4 double_words for equality */ |

451 | psubb %xmm0, %xmm1 /* packed sub of comparison results*/ |

452 | pmovmskb %xmm1, %edx |

453 | sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ |

454 | jnz L(less4_double_words_48) |

455 | |

456 | add $64, %rsi |

457 | add $64, %rdi |

458 | jmp L(continue_00_00) |

459 | |

460 | .p2align 4 |

461 | L(continue_00_32): |

462 | movdqu (%rsi), %xmm2 |

463 | pcmpeqd %xmm2, %xmm0 /* Any null double_word? */ |

464 | pcmpeqd (%rdi), %xmm2 /* compare first 4 double_words for equality */ |

465 | psubb %xmm0, %xmm2 /* packed sub of comparison results*/ |

466 | pmovmskb %xmm2, %edx |

467 | sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ |

468 | jnz L(less4_double_words) |

469 | |

470 | add $16, %rsi |

471 | add $16, %rdi |

472 | jmp L(continue_00_48) |

473 | |

474 | .p2align 4 |

475 | L(continue_00_16): |

476 | movdqu (%rsi), %xmm2 |

477 | pcmpeqd %xmm2, %xmm0 /* Any null double_word? */ |

478 | pcmpeqd (%rdi), %xmm2 /* compare first 4 double_words for equality */ |

479 | psubb %xmm0, %xmm2 /* packed sub of comparison results*/ |

480 | pmovmskb %xmm2, %edx |

481 | sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ |

482 | jnz L(less4_double_words) |

483 | |

484 | movdqu 16(%rsi), %xmm2 |

485 | pcmpeqd %xmm2, %xmm0 /* Any null double_word? */ |

486 | pcmpeqd 16(%rdi), %xmm2 /* compare first 4 double_words for equality */ |

487 | psubb %xmm0, %xmm2 /* packed sub of comparison results*/ |

488 | pmovmskb %xmm2, %edx |

489 | sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ |

490 | jnz L(less4_double_words_16) |

491 | |

492 | add $32, %rsi |

493 | add $32, %rdi |

494 | jmp L(continue_00_48) |

495 | |

496 | .p2align 4 |

497 | L(continue_00_0): |

498 | movdqu (%rsi), %xmm2 |

499 | pcmpeqd %xmm2, %xmm0 /* Any null double_word? */ |

500 | pcmpeqd (%rdi), %xmm2 /* compare first 4 double_words for equality */ |

501 | psubb %xmm0, %xmm2 /* packed sub of comparison results*/ |

502 | pmovmskb %xmm2, %edx |

503 | sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ |

504 | jnz L(less4_double_words) |

505 | |

506 | movdqu 16(%rsi), %xmm2 |

507 | pcmpeqd %xmm2, %xmm0 /* Any null double_word? */ |

508 | pcmpeqd 16(%rdi), %xmm2 /* compare first 4 double_words for equality */ |

509 | psubb %xmm0, %xmm2 /* packed sub of comparison results*/ |

510 | pmovmskb %xmm2, %edx |

511 | sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ |

512 | jnz L(less4_double_words_16) |

513 | |

514 | movdqu 32(%rsi), %xmm2 |

515 | pcmpeqd %xmm2, %xmm0 /* Any null double_word? */ |

516 | pcmpeqd 32(%rdi), %xmm2 /* compare first 4 double_words for equality */ |

517 | psubb %xmm0, %xmm2 /* packed sub of comparison results*/ |

518 | pmovmskb %xmm2, %edx |

519 | sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ |

520 | jnz L(less4_double_words_32) |

521 | |

522 | add $48, %rsi |

523 | add $48, %rdi |

524 | jmp L(continue_00_48) |

525 | |

526 | .p2align 4 |

527 | L(continue_48_00): |

528 | pcmpeqd (%rsi), %xmm0 |

529 | mov (%rdi), %eax |

530 | pmovmskb %xmm0, %ecx |

531 | test %ecx, %ecx |

532 | jnz L(less4_double_words1) |

533 | |

534 | cmp (%rsi), %eax |

535 | jne L(nequal) |

536 | |

537 | mov 4(%rdi), %eax |

538 | cmp 4(%rsi), %eax |

539 | jne L(nequal) |

540 | |

541 | mov 8(%rdi), %eax |

542 | cmp 8(%rsi), %eax |

543 | jne L(nequal) |

544 | |

545 | mov 12(%rdi), %eax |

546 | cmp 12(%rsi), %eax |

547 | jne L(nequal) |

548 | |

549 | movdqu 16(%rdi), %xmm1 |

550 | pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ |

551 | pcmpeqd 16(%rsi), %xmm1 /* compare first 4 double_words for equality */ |

552 | psubb %xmm0, %xmm1 /* packed sub of comparison results*/ |

553 | pmovmskb %xmm1, %edx |

554 | sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ |

555 | jnz L(less4_double_words_16) |

556 | |

557 | movdqu 32(%rdi), %xmm1 |

558 | pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ |

559 | pcmpeqd 32(%rsi), %xmm1 /* compare first 4 double_words for equality */ |

560 | psubb %xmm0, %xmm1 /* packed sub of comparison results*/ |

561 | pmovmskb %xmm1, %edx |

562 | sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ |

563 | jnz L(less4_double_words_32) |

564 | |

565 | movdqu 48(%rdi), %xmm1 |

566 | pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ |

567 | pcmpeqd 48(%rsi), %xmm1 /* compare first 4 double_words for equality */ |

568 | psubb %xmm0, %xmm1 /* packed sub of comparison results*/ |

569 | pmovmskb %xmm1, %edx |

570 | sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ |

571 | jnz L(less4_double_words_48) |

572 | |

573 | add $64, %rsi |

574 | add $64, %rdi |

575 | jmp L(continue_48_00) |

576 | |

577 | .p2align 4 |

578 | L(continue_32_00): |

579 | movdqu (%rdi), %xmm1 |

580 | pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ |

581 | pcmpeqd (%rsi), %xmm1 /* compare first 4 double_words for equality */ |

582 | psubb %xmm0, %xmm1 /* packed sub of comparison results*/ |

583 | pmovmskb %xmm1, %edx |

584 | sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ |

585 | jnz L(less4_double_words) |

586 | |

587 | add $16, %rsi |

588 | add $16, %rdi |

589 | jmp L(continue_48_00) |

590 | |

591 | .p2align 4 |

592 | L(continue_16_00): |

593 | movdqu (%rdi), %xmm1 |

594 | pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ |

595 | pcmpeqd (%rsi), %xmm1 /* compare first 4 double_words for equality */ |

596 | psubb %xmm0, %xmm1 /* packed sub of comparison results*/ |

597 | pmovmskb %xmm1, %edx |

598 | sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ |

599 | jnz L(less4_double_words) |

600 | |

601 | movdqu 16(%rdi), %xmm1 |

602 | pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ |

603 | pcmpeqd 16(%rsi), %xmm1 /* compare first 4 double_words for equality */ |

604 | psubb %xmm0, %xmm1 /* packed sub of comparison results*/ |

605 | pmovmskb %xmm1, %edx |

606 | sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ |

607 | jnz L(less4_double_words_16) |

608 | |

609 | add $32, %rsi |

610 | add $32, %rdi |

611 | jmp L(continue_48_00) |

612 | |

613 | .p2align 4 |

614 | L(continue_0_00): |

615 | movdqu (%rdi), %xmm1 |

616 | pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ |

617 | pcmpeqd (%rsi), %xmm1 /* compare first 4 double_words for equality */ |

618 | psubb %xmm0, %xmm1 /* packed sub of comparison results*/ |

619 | pmovmskb %xmm1, %edx |

620 | sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ |

621 | jnz L(less4_double_words) |

622 | |

623 | movdqu 16(%rdi), %xmm1 |

624 | pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ |

625 | pcmpeqd 16(%rsi), %xmm1 /* compare first 4 double_words for equality */ |

626 | psubb %xmm0, %xmm1 /* packed sub of comparison results*/ |

627 | pmovmskb %xmm1, %edx |

628 | sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ |

629 | jnz L(less4_double_words_16) |

630 | |

631 | movdqu 32(%rdi), %xmm1 |

632 | pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ |

633 | pcmpeqd 32(%rsi), %xmm1 /* compare first 4 double_words for equality */ |

634 | psubb %xmm0, %xmm1 /* packed sub of comparison results*/ |

635 | pmovmskb %xmm1, %edx |

636 | sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ |

637 | jnz L(less4_double_words_32) |

638 | |

639 | add $48, %rsi |

640 | add $48, %rdi |

641 | jmp L(continue_48_00) |

642 | |

643 | .p2align 4 |

644 | L(continue_32_32): |

645 | movdqu (%rdi), %xmm1 |

646 | movdqu (%rsi), %xmm2 |

647 | pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ |

648 | pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ |

649 | psubb %xmm0, %xmm1 /* packed sub of comparison results*/ |

650 | pmovmskb %xmm1, %edx |

651 | sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ |

652 | jnz L(less4_double_words) |

653 | |

654 | add $16, %rsi |

655 | add $16, %rdi |

656 | jmp L(continue_48_48) |

657 | |

658 | .p2align 4 |

659 | L(continue_16_16): |

660 | movdqu (%rdi), %xmm1 |

661 | movdqu (%rsi), %xmm2 |

662 | pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ |

663 | pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ |

664 | psubb %xmm0, %xmm1 /* packed sub of comparison results*/ |

665 | pmovmskb %xmm1, %edx |

666 | sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ |

667 | jnz L(less4_double_words) |

668 | |

669 | movdqu 16(%rdi), %xmm3 |

670 | movdqu 16(%rsi), %xmm4 |

671 | pcmpeqd %xmm3, %xmm0 /* Any null double_word? */ |

672 | pcmpeqd %xmm4, %xmm3 /* compare first 4 double_words for equality */ |

673 | psubb %xmm0, %xmm3 /* packed sub of comparison results*/ |

674 | pmovmskb %xmm3, %edx |

675 | sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ |

676 | jnz L(less4_double_words_16) |

677 | |

678 | add $32, %rsi |

679 | add $32, %rdi |

680 | jmp L(continue_48_48) |

681 | |

682 | .p2align 4 |

683 | L(continue_0_0): |

684 | movdqu (%rdi), %xmm1 |

685 | movdqu (%rsi), %xmm2 |

686 | pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ |

687 | pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ |

688 | psubb %xmm0, %xmm1 /* packed sub of comparison results*/ |

689 | pmovmskb %xmm1, %edx |

690 | sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ |

691 | jnz L(less4_double_words) |

692 | |

693 | movdqu 16(%rdi), %xmm3 |

694 | movdqu 16(%rsi), %xmm4 |

695 | pcmpeqd %xmm3, %xmm0 /* Any null double_word? */ |

696 | pcmpeqd %xmm4, %xmm3 /* compare first 4 double_words for equality */ |

697 | psubb %xmm0, %xmm3 /* packed sub of comparison results*/ |

698 | pmovmskb %xmm3, %edx |

699 | sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ |

700 | jnz L(less4_double_words_16) |

701 | |

702 | movdqu 32(%rdi), %xmm1 |

703 | movdqu 32(%rsi), %xmm2 |

704 | pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ |

705 | pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ |

706 | psubb %xmm0, %xmm1 /* packed sub of comparison results*/ |

707 | pmovmskb %xmm1, %edx |

708 | sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ |

709 | jnz L(less4_double_words_32) |

710 | |

711 | add $48, %rsi |

712 | add $48, %rdi |

713 | jmp L(continue_48_48) |

714 | |

715 | .p2align 4 |

716 | L(continue_0_16): |

717 | movdqu (%rdi), %xmm1 |

718 | movdqu (%rsi), %xmm2 |

719 | pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ |

720 | pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ |

721 | psubb %xmm0, %xmm1 /* packed sub of comparison results*/ |

722 | pmovmskb %xmm1, %edx |

723 | sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ |

724 | jnz L(less4_double_words) |

725 | |

726 | movdqu 16(%rdi), %xmm1 |

727 | movdqu 16(%rsi), %xmm2 |

728 | pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ |

729 | pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ |

730 | psubb %xmm0, %xmm1 /* packed sub of comparison results*/ |

731 | pmovmskb %xmm1, %edx |

732 | sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ |

733 | jnz L(less4_double_words_16) |

734 | |

735 | add $32, %rsi |

736 | add $32, %rdi |

737 | jmp L(continue_32_48) |

738 | |

739 | .p2align 4 |

740 | L(continue_0_32): |

741 | movdqu (%rdi), %xmm1 |

742 | movdqu (%rsi), %xmm2 |

743 | pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ |

744 | pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ |

745 | psubb %xmm0, %xmm1 /* packed sub of comparison results*/ |

746 | pmovmskb %xmm1, %edx |

747 | sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ |

748 | jnz L(less4_double_words) |

749 | |

750 | add $16, %rsi |

751 | add $16, %rdi |

752 | jmp L(continue_16_48) |

753 | |

754 | .p2align 4 |

755 | L(continue_16_32): |

756 | movdqu (%rdi), %xmm1 |

757 | movdqu (%rsi), %xmm2 |

758 | pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ |

759 | pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ |

760 | psubb %xmm0, %xmm1 /* packed sub of comparison results*/ |

761 | pmovmskb %xmm1, %edx |

762 | sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ |

763 | jnz L(less4_double_words) |

764 | |

765 | add $16, %rsi |

766 | add $16, %rdi |

767 | jmp L(continue_32_48) |

768 | |

769 | .p2align 4 |

770 | L(less4_double_words1): |

771 | cmp (%rsi), %eax |

772 | jne L(nequal) |

773 | test %eax, %eax |

774 | jz L(equal) |

775 | |

776 | mov 4(%rsi), %ecx |

777 | cmp %ecx, 4(%rdi) |

778 | jne L(nequal) |

779 | test %ecx, %ecx |

780 | jz L(equal) |

781 | |

782 | mov 8(%rsi), %ecx |

783 | cmp %ecx, 8(%rdi) |

784 | jne L(nequal) |

785 | test %ecx, %ecx |

786 | jz L(equal) |

787 | |

788 | mov 12(%rsi), %ecx |

789 | cmp %ecx, 12(%rdi) |

790 | jne L(nequal) |

791 | xor %eax, %eax |

792 | ret |

793 | |

794 | .p2align 4 |

795 | L(less4_double_words): |

796 | xor %eax, %eax |

797 | test %dl, %dl |

798 | jz L(next_two_double_words) |

799 | and $15, %dl |

800 | jz L(second_double_word) |

801 | mov (%rdi), %eax |

802 | cmp (%rsi), %eax |

803 | jne L(nequal) |

804 | ret |

805 | |

806 | .p2align 4 |

807 | L(second_double_word): |

808 | mov 4(%rdi), %eax |

809 | cmp 4(%rsi), %eax |

810 | jne L(nequal) |

811 | ret |

812 | |

813 | .p2align 4 |

814 | L(next_two_double_words): |

815 | and $15, %dh |

816 | jz L(fourth_double_word) |

817 | mov 8(%rdi), %eax |

818 | cmp 8(%rsi), %eax |

819 | jne L(nequal) |

820 | ret |

821 | |

822 | .p2align 4 |

823 | L(fourth_double_word): |

824 | mov 12(%rdi), %eax |

825 | cmp 12(%rsi), %eax |

826 | jne L(nequal) |

827 | ret |

828 | |

829 | .p2align 4 |

830 | L(less4_double_words_16): |

831 | xor %eax, %eax |

832 | test %dl, %dl |

833 | jz L(next_two_double_words_16) |

834 | and $15, %dl |

835 | jz L(second_double_word_16) |

836 | mov 16(%rdi), %eax |

837 | cmp 16(%rsi), %eax |

838 | jne L(nequal) |

839 | ret |

840 | |

841 | .p2align 4 |

842 | L(second_double_word_16): |

843 | mov 20(%rdi), %eax |

844 | cmp 20(%rsi), %eax |

845 | jne L(nequal) |

846 | ret |

847 | |

848 | .p2align 4 |

849 | L(next_two_double_words_16): |

850 | and $15, %dh |

851 | jz L(fourth_double_word_16) |

852 | mov 24(%rdi), %eax |

853 | cmp 24(%rsi), %eax |

854 | jne L(nequal) |

855 | ret |

856 | |

857 | .p2align 4 |

858 | L(fourth_double_word_16): |

859 | mov 28(%rdi), %eax |

860 | cmp 28(%rsi), %eax |

861 | jne L(nequal) |

862 | ret |

863 | |

864 | .p2align 4 |

865 | L(less4_double_words_32): |

866 | xor %eax, %eax |

867 | test %dl, %dl |

868 | jz L(next_two_double_words_32) |

869 | and $15, %dl |

870 | jz L(second_double_word_32) |

871 | mov 32(%rdi), %eax |

872 | cmp 32(%rsi), %eax |

873 | jne L(nequal) |

874 | ret |

875 | |

876 | .p2align 4 |

877 | L(second_double_word_32): |

878 | mov 36(%rdi), %eax |

879 | cmp 36(%rsi), %eax |

880 | jne L(nequal) |

881 | ret |

882 | |

883 | .p2align 4 |

884 | L(next_two_double_words_32): |

885 | and $15, %dh |

886 | jz L(fourth_double_word_32) |

887 | mov 40(%rdi), %eax |

888 | cmp 40(%rsi), %eax |

889 | jne L(nequal) |

890 | ret |

891 | |

892 | .p2align 4 |

893 | L(fourth_double_word_32): |

894 | mov 44(%rdi), %eax |

895 | cmp 44(%rsi), %eax |

896 | jne L(nequal) |

897 | ret |

898 | |

899 | .p2align 4 |

900 | L(less4_double_words_48): |

901 | xor %eax, %eax |

902 | test %dl, %dl |

903 | jz L(next_two_double_words_48) |

904 | and $15, %dl |

905 | jz L(second_double_word_48) |

906 | mov 48(%rdi), %eax |

907 | cmp 48(%rsi), %eax |

908 | jne L(nequal) |

909 | ret |

910 | |

911 | .p2align 4 |

912 | L(second_double_word_48): |

913 | mov 52(%rdi), %eax |

914 | cmp 52(%rsi), %eax |

915 | jne L(nequal) |

916 | ret |

917 | |

918 | .p2align 4 |

919 | L(next_two_double_words_48): |

920 | and $15, %dh |

921 | jz L(fourth_double_word_48) |

922 | mov 56(%rdi), %eax |

923 | cmp 56(%rsi), %eax |

924 | jne L(nequal) |

925 | ret |

926 | |

927 | .p2align 4 |

928 | L(fourth_double_word_48): |

929 | mov 60(%rdi), %eax |

930 | cmp 60(%rsi), %eax |

931 | jne L(nequal) |

932 | ret |

933 | |

934 | .p2align 4 |

935 | L(nequal): |

936 | mov $1, %eax |

937 | jg L(nequal_bigger) |

938 | neg %eax |

939 | |

940 | L(nequal_bigger): |

941 | ret |

942 | |

943 | .p2align 4 |

944 | L(equal): |

945 | xor %rax, %rax |

946 | ret |

947 | |

948 | END (__wcscmp) |

949 | #ifndef __wcscmp |

950 | libc_hidden_def (__wcscmp) |

951 | weak_alias (__wcscmp, wcscmp) |

952 | #endif |

953 |