1 | /* strrchr (str, ch) -- Return pointer to last occurrence of CH in STR. |
---|---|

2 | Copyright (C) 2013-2019 Free Software Foundation, Inc. |

3 | This file is part of the GNU C Library. |

4 | |

5 | The GNU C Library is free software; you can redistribute it and/or |

6 | modify it under the terms of the GNU Lesser General Public |

7 | License as published by the Free Software Foundation; either |

8 | version 2.1 of the License, or (at your option) any later version. |

9 | |

10 | The GNU C Library is distributed in the hope that it will be useful, |

11 | but WITHOUT ANY WARRANTY; without even the implied warranty of |

12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |

13 | Lesser General Public License for more details. |

14 | |

15 | You should have received a copy of the GNU Lesser General Public |

16 | License along with the GNU C Library; if not, see |

17 | <http://www.gnu.org/licenses/>. */ |

18 | |

19 | |

20 | #include <sysdep.h> |

21 | |

22 | .text |

23 | ENTRY (strrchr) |

24 | movd %esi, %xmm1 |

25 | movq %rdi, %rax |

26 | andl $4095, %eax |

27 | punpcklbw %xmm1, %xmm1 |

28 | cmpq $4032, %rax |

29 | punpcklwd %xmm1, %xmm1 |

30 | pshufd $0, %xmm1, %xmm1 |

31 | ja L(cross_page) |

32 | movdqu (%rdi), %xmm0 |

33 | pxor %xmm2, %xmm2 |

34 | movdqa %xmm0, %xmm3 |

35 | pcmpeqb %xmm1, %xmm0 |

36 | pcmpeqb %xmm2, %xmm3 |

37 | pmovmskb %xmm0, %ecx |

38 | pmovmskb %xmm3, %edx |

39 | testq %rdx, %rdx |

40 | je L(next_48_bytes) |

41 | leaq -1(%rdx), %rax |

42 | xorq %rdx, %rax |

43 | andq %rcx, %rax |

44 | je L(exit) |

45 | bsrq %rax, %rax |

46 | addq %rdi, %rax |

47 | ret |

48 | |

49 | .p2align 4 |

50 | L(next_48_bytes): |

51 | movdqu 16(%rdi), %xmm4 |

52 | movdqa %xmm4, %xmm5 |

53 | movdqu 32(%rdi), %xmm3 |

54 | pcmpeqb %xmm1, %xmm4 |

55 | pcmpeqb %xmm2, %xmm5 |

56 | movdqu 48(%rdi), %xmm0 |

57 | pmovmskb %xmm5, %edx |

58 | movdqa %xmm3, %xmm5 |

59 | pcmpeqb %xmm1, %xmm3 |

60 | pcmpeqb %xmm2, %xmm5 |

61 | pcmpeqb %xmm0, %xmm2 |

62 | salq $16, %rdx |

63 | pmovmskb %xmm3, %r8d |

64 | pmovmskb %xmm5, %eax |

65 | pmovmskb %xmm2, %esi |

66 | salq $32, %r8 |

67 | salq $32, %rax |

68 | pcmpeqb %xmm1, %xmm0 |

69 | orq %rdx, %rax |

70 | movq %rsi, %rdx |

71 | pmovmskb %xmm4, %esi |

72 | salq $48, %rdx |

73 | salq $16, %rsi |

74 | orq %r8, %rsi |

75 | orq %rcx, %rsi |

76 | pmovmskb %xmm0, %ecx |

77 | salq $48, %rcx |

78 | orq %rcx, %rsi |

79 | orq %rdx, %rax |

80 | je L(loop_header2) |

81 | leaq -1(%rax), %rcx |

82 | xorq %rax, %rcx |

83 | andq %rcx, %rsi |

84 | je L(exit) |

85 | bsrq %rsi, %rsi |

86 | leaq (%rdi,%rsi), %rax |

87 | ret |

88 | |

89 | .p2align 4 |

90 | L(loop_header2): |

91 | testq %rsi, %rsi |

92 | movq %rdi, %rcx |

93 | je L(no_c_found) |

94 | L(loop_header): |

95 | addq $64, %rdi |

96 | pxor %xmm7, %xmm7 |

97 | andq $-64, %rdi |

98 | jmp L(loop_entry) |

99 | |

100 | .p2align 4 |

101 | L(loop64): |

102 | testq %rdx, %rdx |

103 | cmovne %rdx, %rsi |

104 | cmovne %rdi, %rcx |

105 | addq $64, %rdi |

106 | L(loop_entry): |

107 | movdqa 32(%rdi), %xmm3 |

108 | pxor %xmm6, %xmm6 |

109 | movdqa 48(%rdi), %xmm2 |

110 | movdqa %xmm3, %xmm0 |

111 | movdqa 16(%rdi), %xmm4 |

112 | pminub %xmm2, %xmm0 |

113 | movdqa (%rdi), %xmm5 |

114 | pminub %xmm4, %xmm0 |

115 | pminub %xmm5, %xmm0 |

116 | pcmpeqb %xmm7, %xmm0 |

117 | pmovmskb %xmm0, %eax |

118 | movdqa %xmm5, %xmm0 |

119 | pcmpeqb %xmm1, %xmm0 |

120 | pmovmskb %xmm0, %r9d |

121 | movdqa %xmm4, %xmm0 |

122 | pcmpeqb %xmm1, %xmm0 |

123 | pmovmskb %xmm0, %edx |

124 | movdqa %xmm3, %xmm0 |

125 | pcmpeqb %xmm1, %xmm0 |

126 | salq $16, %rdx |

127 | pmovmskb %xmm0, %r10d |

128 | movdqa %xmm2, %xmm0 |

129 | pcmpeqb %xmm1, %xmm0 |

130 | salq $32, %r10 |

131 | orq %r10, %rdx |

132 | pmovmskb %xmm0, %r8d |

133 | orq %r9, %rdx |

134 | salq $48, %r8 |

135 | orq %r8, %rdx |

136 | testl %eax, %eax |

137 | je L(loop64) |

138 | pcmpeqb %xmm6, %xmm4 |

139 | pcmpeqb %xmm6, %xmm3 |

140 | pcmpeqb %xmm6, %xmm5 |

141 | pmovmskb %xmm4, %eax |

142 | pmovmskb %xmm3, %r10d |

143 | pcmpeqb %xmm6, %xmm2 |

144 | pmovmskb %xmm5, %r9d |

145 | salq $32, %r10 |

146 | salq $16, %rax |

147 | pmovmskb %xmm2, %r8d |

148 | orq %r10, %rax |

149 | orq %r9, %rax |

150 | salq $48, %r8 |

151 | orq %r8, %rax |

152 | leaq -1(%rax), %r8 |

153 | xorq %rax, %r8 |

154 | andq %r8, %rdx |

155 | cmovne %rdi, %rcx |

156 | cmovne %rdx, %rsi |

157 | bsrq %rsi, %rsi |

158 | leaq (%rcx,%rsi), %rax |

159 | ret |

160 | |

161 | .p2align 4 |

162 | L(no_c_found): |

163 | movl $1, %esi |

164 | xorl %ecx, %ecx |

165 | jmp L(loop_header) |

166 | |

167 | .p2align 4 |

168 | L(exit): |

169 | xorl %eax, %eax |

170 | ret |

171 | |

172 | .p2align 4 |

173 | L(cross_page): |

174 | movq %rdi, %rax |

175 | pxor %xmm0, %xmm0 |

176 | andq $-64, %rax |

177 | movdqu (%rax), %xmm5 |

178 | movdqa %xmm5, %xmm6 |

179 | movdqu 16(%rax), %xmm4 |

180 | pcmpeqb %xmm1, %xmm5 |

181 | pcmpeqb %xmm0, %xmm6 |

182 | movdqu 32(%rax), %xmm3 |

183 | pmovmskb %xmm6, %esi |

184 | movdqa %xmm4, %xmm6 |

185 | movdqu 48(%rax), %xmm2 |

186 | pcmpeqb %xmm1, %xmm4 |

187 | pcmpeqb %xmm0, %xmm6 |

188 | pmovmskb %xmm6, %edx |

189 | movdqa %xmm3, %xmm6 |

190 | pcmpeqb %xmm1, %xmm3 |

191 | pcmpeqb %xmm0, %xmm6 |

192 | pcmpeqb %xmm2, %xmm0 |

193 | salq $16, %rdx |

194 | pmovmskb %xmm3, %r9d |

195 | pmovmskb %xmm6, %r8d |

196 | pmovmskb %xmm0, %ecx |

197 | salq $32, %r9 |

198 | salq $32, %r8 |

199 | pcmpeqb %xmm1, %xmm2 |

200 | orq %r8, %rdx |

201 | salq $48, %rcx |

202 | pmovmskb %xmm5, %r8d |

203 | orq %rsi, %rdx |

204 | pmovmskb %xmm4, %esi |

205 | orq %rcx, %rdx |

206 | pmovmskb %xmm2, %ecx |

207 | salq $16, %rsi |

208 | salq $48, %rcx |

209 | orq %r9, %rsi |

210 | orq %r8, %rsi |

211 | orq %rcx, %rsi |

212 | movl %edi, %ecx |

213 | subl %eax, %ecx |

214 | shrq %cl, %rdx |

215 | shrq %cl, %rsi |

216 | testq %rdx, %rdx |

217 | je L(loop_header2) |

218 | leaq -1(%rdx), %rax |

219 | xorq %rdx, %rax |

220 | andq %rax, %rsi |

221 | je L(exit) |

222 | bsrq %rsi, %rax |

223 | addq %rdi, %rax |

224 | ret |

225 | END (strrchr) |

226 | |

227 | weak_alias (strrchr, rindex) |

228 | libc_hidden_builtin_def (strrchr) |

229 |