1 | //===- StringRef.h - Constant String Reference Wrapper ----------*- C++ -*-===// |
---|---|

2 | // |

3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |

4 | // See https://llvm.org/LICENSE.txt for license information. |

5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |

6 | // |

7 | //===----------------------------------------------------------------------===// |

8 | |

9 | #ifndef LLVM_ADT_STRINGREF_H |

10 | #define LLVM_ADT_STRINGREF_H |

11 | |

12 | #include "llvm/ADT/STLExtras.h" |

13 | #include "llvm/ADT/iterator_range.h" |

14 | #include "llvm/Support/Compiler.h" |

15 | #include <algorithm> |

16 | #include <cassert> |

17 | #include <cstddef> |

18 | #include <cstring> |

19 | #include <limits> |

20 | #include <string> |

21 | #include <type_traits> |

22 | #include <utility> |

23 | |

24 | namespace llvm { |

25 | |

26 | class APInt; |

27 | class hash_code; |

28 | template <typename T> class SmallVectorImpl; |

29 | class StringRef; |

30 | |

31 | /// Helper functions for StringRef::getAsInteger. |

32 | bool getAsUnsignedInteger(StringRef Str, unsigned Radix, |

33 | unsigned long long &Result); |

34 | |

35 | bool getAsSignedInteger(StringRef Str, unsigned Radix, long long &Result); |

36 | |

37 | bool consumeUnsignedInteger(StringRef &Str, unsigned Radix, |

38 | unsigned long long &Result); |

39 | bool consumeSignedInteger(StringRef &Str, unsigned Radix, long long &Result); |

40 | |

41 | /// StringRef - Represent a constant reference to a string, i.e. a character |

42 | /// array and a length, which need not be null terminated. |

43 | /// |

44 | /// This class does not own the string data, it is expected to be used in |

45 | /// situations where the character data resides in some other buffer, whose |

46 | /// lifetime extends past that of the StringRef. For this reason, it is not in |

47 | /// general safe to store a StringRef. |

48 | class StringRef { |

49 | public: |

50 | static const size_t npos = ~size_t(0); |

51 | |

52 | using iterator = const char *; |

53 | using const_iterator = const char *; |

54 | using size_type = size_t; |

55 | |

56 | private: |

57 | /// The start of the string, in an external buffer. |

58 | const char *Data = nullptr; |

59 | |

60 | /// The length of the string. |

61 | size_t Length = 0; |

62 | |

63 | // Workaround memcmp issue with null pointers (undefined behavior) |

64 | // by providing a specialized version |

65 | static int compareMemory(const char *Lhs, const char *Rhs, size_t Length) { |

66 | if (Length == 0) { return 0; } |

67 | return ::memcmp(Lhs,Rhs,Length); |

68 | } |

69 | |

70 | public: |

71 | /// @name Constructors |

72 | /// @{ |

73 | |

74 | /// Construct an empty string ref. |

75 | /*implicit*/ StringRef() = default; |

76 | |

77 | /// Disable conversion from nullptr. This prevents things like |

78 | /// if (S == nullptr) |

79 | StringRef(std::nullptr_t) = delete; |

80 | |

81 | /// Construct a string ref from a cstring. |

82 | /*implicit*/ StringRef(const char *Str) |

83 | : Data(Str), Length(Str ? ::strlen(Str) : 0) {} |

84 | |

85 | /// Construct a string ref from a pointer and length. |

86 | /*implicit*/ constexpr StringRef(const char *data, size_t length) |

87 | : Data(data), Length(length) {} |

88 | |

89 | /// Construct a string ref from an std::string. |

90 | /*implicit*/ StringRef(const std::string &Str) |

91 | : Data(Str.data()), Length(Str.length()) {} |

92 | |

93 | static StringRef withNullAsEmpty(const char *data) { |

94 | return StringRef(data ? data : ""); |

95 | } |

96 | |

97 | /// @} |

98 | /// @name Iterators |

99 | /// @{ |

100 | |

101 | iterator begin() const { return Data; } |

102 | |

103 | iterator end() const { return Data + Length; } |

104 | |

105 | const unsigned char *bytes_begin() const { |

106 | return reinterpret_cast<const unsigned char *>(begin()); |

107 | } |

108 | const unsigned char *bytes_end() const { |

109 | return reinterpret_cast<const unsigned char *>(end()); |

110 | } |

111 | iterator_range<const unsigned char *> bytes() const { |

112 | return make_range(bytes_begin(), bytes_end()); |

113 | } |

114 | |

115 | /// @} |

116 | /// @name String Operations |

117 | /// @{ |

118 | |

119 | /// data - Get a pointer to the start of the string (which may not be null |

120 | /// terminated). |

121 | LLVM_NODISCARD |

122 | const char *data() const { return Data; } |

123 | |

124 | /// empty - Check if the string is empty. |

125 | LLVM_NODISCARD |

126 | bool empty() const { return Length == 0; } |

127 | |

128 | /// size - Get the string size. |

129 | LLVM_NODISCARD |

130 | size_t size() const { return Length; } |

131 | |

132 | /// front - Get the first character in the string. |

133 | LLVM_NODISCARD |

134 | char front() const { |

135 | assert(!empty()); |

136 | return Data[0]; |

137 | } |

138 | |

139 | /// back - Get the last character in the string. |

140 | LLVM_NODISCARD |

141 | char back() const { |

142 | assert(!empty()); |

143 | return Data[Length-1]; |

144 | } |

145 | |

146 | // copy - Allocate copy in Allocator and return StringRef to it. |

147 | template <typename Allocator> |

148 | LLVM_NODISCARD StringRef copy(Allocator &A) const { |

149 | // Don't request a length 0 copy from the allocator. |

150 | if (empty()) |

151 | return StringRef(); |

152 | char *S = A.template Allocate<char>(Length); |

153 | std::copy(begin(), end(), S); |

154 | return StringRef(S, Length); |

155 | } |

156 | |

157 | /// equals - Check for string equality, this is more efficient than |

158 | /// compare() when the relative ordering of inequal strings isn't needed. |

159 | LLVM_NODISCARD |

160 | bool equals(StringRef RHS) const { |

161 | return (Length == RHS.Length && |

162 | compareMemory(Data, RHS.Data, RHS.Length) == 0); |

163 | } |

164 | |

165 | /// equals_lower - Check for string equality, ignoring case. |

166 | LLVM_NODISCARD |

167 | bool equals_lower(StringRef RHS) const { |

168 | return Length == RHS.Length && compare_lower(RHS) == 0; |

169 | } |

170 | |

171 | /// compare - Compare two strings; the result is -1, 0, or 1 if this string |

172 | /// is lexicographically less than, equal to, or greater than the \p RHS. |

173 | LLVM_NODISCARD |

174 | int compare(StringRef RHS) const { |

175 | // Check the prefix for a mismatch. |

176 | if (int Res = compareMemory(Data, RHS.Data, std::min(Length, RHS.Length))) |

177 | return Res < 0 ? -1 : 1; |

178 | |

179 | // Otherwise the prefixes match, so we only need to check the lengths. |

180 | if (Length == RHS.Length) |

181 | return 0; |

182 | return Length < RHS.Length ? -1 : 1; |

183 | } |

184 | |

185 | /// compare_lower - Compare two strings, ignoring case. |

186 | LLVM_NODISCARD |

187 | int compare_lower(StringRef RHS) const; |

188 | |

189 | /// compare_numeric - Compare two strings, treating sequences of digits as |

190 | /// numbers. |

191 | LLVM_NODISCARD |

192 | int compare_numeric(StringRef RHS) const; |

193 | |

194 | /// Determine the edit distance between this string and another |

195 | /// string. |

196 | /// |

197 | /// \param Other the string to compare this string against. |

198 | /// |

199 | /// \param AllowReplacements whether to allow character |

200 | /// replacements (change one character into another) as a single |

201 | /// operation, rather than as two operations (an insertion and a |

202 | /// removal). |

203 | /// |

204 | /// \param MaxEditDistance If non-zero, the maximum edit distance that |

205 | /// this routine is allowed to compute. If the edit distance will exceed |

206 | /// that maximum, returns \c MaxEditDistance+1. |

207 | /// |

208 | /// \returns the minimum number of character insertions, removals, |

209 | /// or (if \p AllowReplacements is \c true) replacements needed to |

210 | /// transform one of the given strings into the other. If zero, |

211 | /// the strings are identical. |

212 | LLVM_NODISCARD |

213 | unsigned edit_distance(StringRef Other, bool AllowReplacements = true, |

214 | unsigned MaxEditDistance = 0) const; |

215 | |

216 | /// str - Get the contents as an std::string. |

217 | LLVM_NODISCARD |

218 | std::string str() const { |

219 | if (!Data) return std::string(); |

220 | return std::string(Data, Length); |

221 | } |

222 | |

223 | /// @} |

224 | /// @name Operator Overloads |

225 | /// @{ |

226 | |

227 | LLVM_NODISCARD |

228 | char operator[](size_t Index) const { |

229 | assert(Index < Length && "Invalid index!"); |

230 | return Data[Index]; |

231 | } |

232 | |

233 | /// Disallow accidental assignment from a temporary std::string. |

234 | /// |

235 | /// The declaration here is extra complicated so that `stringRef = {}` |

236 | /// and `stringRef = "abc"` continue to select the move assignment operator. |

237 | template <typename T> |

238 | typename std::enable_if<std::is_same<T, std::string>::value, |

239 | StringRef>::type & |

240 | operator=(T &&Str) = delete; |

241 | |

242 | /// @} |

243 | /// @name Type Conversions |

244 | /// @{ |

245 | |

246 | operator std::string() const { |

247 | return str(); |

248 | } |

249 | |

250 | /// @} |

251 | /// @name String Predicates |

252 | /// @{ |

253 | |

254 | /// Check if this string starts with the given \p Prefix. |

255 | LLVM_NODISCARD |

256 | bool startswith(StringRef Prefix) const { |

257 | return Length >= Prefix.Length && |

258 | compareMemory(Data, Prefix.Data, Prefix.Length) == 0; |

259 | } |

260 | |

261 | /// Check if this string starts with the given \p Prefix, ignoring case. |

262 | LLVM_NODISCARD |

263 | bool startswith_lower(StringRef Prefix) const; |

264 | |

265 | /// Check if this string ends with the given \p Suffix. |

266 | LLVM_NODISCARD |

267 | bool endswith(StringRef Suffix) const { |

268 | return Length >= Suffix.Length && |

269 | compareMemory(end() - Suffix.Length, Suffix.Data, Suffix.Length) == 0; |

270 | } |

271 | |

272 | /// Check if this string ends with the given \p Suffix, ignoring case. |

273 | LLVM_NODISCARD |

274 | bool endswith_lower(StringRef Suffix) const; |

275 | |

276 | /// @} |

277 | /// @name String Searching |

278 | /// @{ |

279 | |

280 | /// Search for the first character \p C in the string. |

281 | /// |

282 | /// \returns The index of the first occurrence of \p C, or npos if not |

283 | /// found. |

284 | LLVM_NODISCARD |

285 | size_t find(char C, size_t From = 0) const { |

286 | size_t FindBegin = std::min(From, Length); |

287 | if (FindBegin < Length) { // Avoid calling memchr with nullptr. |

288 | // Just forward to memchr, which is faster than a hand-rolled loop. |

289 | if (const void *P = ::memchr(Data + FindBegin, C, Length - FindBegin)) |

290 | return static_cast<const char *>(P) - Data; |

291 | } |

292 | return npos; |

293 | } |

294 | |

295 | /// Search for the first character \p C in the string, ignoring case. |

296 | /// |

297 | /// \returns The index of the first occurrence of \p C, or npos if not |

298 | /// found. |

299 | LLVM_NODISCARD |

300 | size_t find_lower(char C, size_t From = 0) const; |

301 | |

302 | /// Search for the first character satisfying the predicate \p F |

303 | /// |

304 | /// \returns The index of the first character satisfying \p F starting from |

305 | /// \p From, or npos if not found. |

306 | LLVM_NODISCARD |

307 | size_t find_if(function_ref<bool(char)> F, size_t From = 0) const { |

308 | StringRef S = drop_front(From); |

309 | while (!S.empty()) { |

310 | if (F(S.front())) |

311 | return size() - S.size(); |

312 | S = S.drop_front(); |

313 | } |

314 | return npos; |

315 | } |

316 | |

317 | /// Search for the first character not satisfying the predicate \p F |

318 | /// |

319 | /// \returns The index of the first character not satisfying \p F starting |

320 | /// from \p From, or npos if not found. |

321 | LLVM_NODISCARD |

322 | size_t find_if_not(function_ref<bool(char)> F, size_t From = 0) const { |

323 | return find_if([F](char c) { return !F(c); }, From); |

324 | } |

325 | |

326 | /// Search for the first string \p Str in the string. |

327 | /// |

328 | /// \returns The index of the first occurrence of \p Str, or npos if not |

329 | /// found. |

330 | LLVM_NODISCARD |

331 | size_t find(StringRef Str, size_t From = 0) const; |

332 | |

333 | /// Search for the first string \p Str in the string, ignoring case. |

334 | /// |

335 | /// \returns The index of the first occurrence of \p Str, or npos if not |

336 | /// found. |

337 | LLVM_NODISCARD |

338 | size_t find_lower(StringRef Str, size_t From = 0) const; |

339 | |

340 | /// Search for the last character \p C in the string. |

341 | /// |

342 | /// \returns The index of the last occurrence of \p C, or npos if not |

343 | /// found. |

344 | LLVM_NODISCARD |

345 | size_t rfind(char C, size_t From = npos) const { |

346 | From = std::min(From, Length); |

347 | size_t i = From; |

348 | while (i != 0) { |

349 | --i; |

350 | if (Data[i] == C) |

351 | return i; |

352 | } |

353 | return npos; |

354 | } |

355 | |

356 | /// Search for the last character \p C in the string, ignoring case. |

357 | /// |

358 | /// \returns The index of the last occurrence of \p C, or npos if not |

359 | /// found. |

360 | LLVM_NODISCARD |

361 | size_t rfind_lower(char C, size_t From = npos) const; |

362 | |

363 | /// Search for the last string \p Str in the string. |

364 | /// |

365 | /// \returns The index of the last occurrence of \p Str, or npos if not |

366 | /// found. |

367 | LLVM_NODISCARD |

368 | size_t rfind(StringRef Str) const; |

369 | |

370 | /// Search for the last string \p Str in the string, ignoring case. |

371 | /// |

372 | /// \returns The index of the last occurrence of \p Str, or npos if not |

373 | /// found. |

374 | LLVM_NODISCARD |

375 | size_t rfind_lower(StringRef Str) const; |

376 | |

377 | /// Find the first character in the string that is \p C, or npos if not |

378 | /// found. Same as find. |

379 | LLVM_NODISCARD |

380 | size_t find_first_of(char C, size_t From = 0) const { |

381 | return find(C, From); |

382 | } |

383 | |

384 | /// Find the first character in the string that is in \p Chars, or npos if |

385 | /// not found. |

386 | /// |

387 | /// Complexity: O(size() + Chars.size()) |

388 | LLVM_NODISCARD |

389 | size_t find_first_of(StringRef Chars, size_t From = 0) const; |

390 | |

391 | /// Find the first character in the string that is not \p C or npos if not |

392 | /// found. |

393 | LLVM_NODISCARD |

394 | size_t find_first_not_of(char C, size_t From = 0) const; |

395 | |

396 | /// Find the first character in the string that is not in the string |

397 | /// \p Chars, or npos if not found. |

398 | /// |

399 | /// Complexity: O(size() + Chars.size()) |

400 | LLVM_NODISCARD |

401 | size_t find_first_not_of(StringRef Chars, size_t From = 0) const; |

402 | |

403 | /// Find the last character in the string that is \p C, or npos if not |

404 | /// found. |

405 | LLVM_NODISCARD |

406 | size_t find_last_of(char C, size_t From = npos) const { |

407 | return rfind(C, From); |

408 | } |

409 | |

410 | /// Find the last character in the string that is in \p C, or npos if not |

411 | /// found. |

412 | /// |

413 | /// Complexity: O(size() + Chars.size()) |

414 | LLVM_NODISCARD |

415 | size_t find_last_of(StringRef Chars, size_t From = npos) const; |

416 | |

417 | /// Find the last character in the string that is not \p C, or npos if not |

418 | /// found. |

419 | LLVM_NODISCARD |

420 | size_t find_last_not_of(char C, size_t From = npos) const; |

421 | |

422 | /// Find the last character in the string that is not in \p Chars, or |

423 | /// npos if not found. |

424 | /// |

425 | /// Complexity: O(size() + Chars.size()) |

426 | LLVM_NODISCARD |

427 | size_t find_last_not_of(StringRef Chars, size_t From = npos) const; |

428 | |

429 | /// Return true if the given string is a substring of *this, and false |

430 | /// otherwise. |

431 | LLVM_NODISCARD |

432 | bool contains(StringRef Other) const { return find(Other) != npos; } |

433 | |

434 | /// Return true if the given character is contained in *this, and false |

435 | /// otherwise. |

436 | LLVM_NODISCARD |

437 | bool contains(char C) const { return find_first_of(C) != npos; } |

438 | |

439 | /// Return true if the given string is a substring of *this, and false |

440 | /// otherwise. |

441 | LLVM_NODISCARD |

442 | bool contains_lower(StringRef Other) const { |

443 | return find_lower(Other) != npos; |

444 | } |

445 | |

446 | /// Return true if the given character is contained in *this, and false |

447 | /// otherwise. |

448 | LLVM_NODISCARD |

449 | bool contains_lower(char C) const { return find_lower(C) != npos; } |

450 | |

451 | /// @} |

452 | /// @name Helpful Algorithms |

453 | /// @{ |

454 | |

455 | /// Return the number of occurrences of \p C in the string. |

456 | LLVM_NODISCARD |

457 | size_t count(char C) const { |

458 | size_t Count = 0; |

459 | for (size_t i = 0, e = Length; i != e; ++i) |

460 | if (Data[i] == C) |

461 | ++Count; |

462 | return Count; |

463 | } |

464 | |

465 | /// Return the number of non-overlapped occurrences of \p Str in |

466 | /// the string. |

467 | size_t count(StringRef Str) const; |

468 | |

469 | /// Parse the current string as an integer of the specified radix. If |

470 | /// \p Radix is specified as zero, this does radix autosensing using |

471 | /// extended C rules: 0 is octal, 0x is hex, 0b is binary. |

472 | /// |

473 | /// If the string is invalid or if only a subset of the string is valid, |

474 | /// this returns true to signify the error. The string is considered |

475 | /// erroneous if empty or if it overflows T. |

476 | template <typename T> |

477 | typename std::enable_if<std::numeric_limits<T>::is_signed, bool>::type |

478 | getAsInteger(unsigned Radix, T &Result) const { |

479 | long long LLVal; |

480 | if (getAsSignedInteger(*this, Radix, LLVal) || |

481 | static_cast<T>(LLVal) != LLVal) |

482 | return true; |

483 | Result = LLVal; |

484 | return false; |

485 | } |

486 | |

487 | template <typename T> |

488 | typename std::enable_if<!std::numeric_limits<T>::is_signed, bool>::type |

489 | getAsInteger(unsigned Radix, T &Result) const { |

490 | unsigned long long ULLVal; |

491 | // The additional cast to unsigned long long is required to avoid the |

492 | // Visual C++ warning C4805: '!=' : unsafe mix of type 'bool' and type |

493 | // 'unsigned __int64' when instantiating getAsInteger with T = bool. |

494 | if (getAsUnsignedInteger(*this, Radix, ULLVal) || |

495 | static_cast<unsigned long long>(static_cast<T>(ULLVal)) != ULLVal) |

496 | return true; |

497 | Result = ULLVal; |

498 | return false; |

499 | } |

500 | |

501 | /// Parse the current string as an integer of the specified radix. If |

502 | /// \p Radix is specified as zero, this does radix autosensing using |

503 | /// extended C rules: 0 is octal, 0x is hex, 0b is binary. |

504 | /// |

505 | /// If the string does not begin with a number of the specified radix, |

506 | /// this returns true to signify the error. The string is considered |

507 | /// erroneous if empty or if it overflows T. |

508 | /// The portion of the string representing the discovered numeric value |

509 | /// is removed from the beginning of the string. |

510 | template <typename T> |

511 | typename std::enable_if<std::numeric_limits<T>::is_signed, bool>::type |

512 | consumeInteger(unsigned Radix, T &Result) { |

513 | long long LLVal; |

514 | if (consumeSignedInteger(*this, Radix, LLVal) || |

515 | static_cast<long long>(static_cast<T>(LLVal)) != LLVal) |

516 | return true; |

517 | Result = LLVal; |

518 | return false; |

519 | } |

520 | |

521 | template <typename T> |

522 | typename std::enable_if<!std::numeric_limits<T>::is_signed, bool>::type |

523 | consumeInteger(unsigned Radix, T &Result) { |

524 | unsigned long long ULLVal; |

525 | if (consumeUnsignedInteger(*this, Radix, ULLVal) || |

526 | static_cast<unsigned long long>(static_cast<T>(ULLVal)) != ULLVal) |

527 | return true; |

528 | Result = ULLVal; |

529 | return false; |

530 | } |

531 | |

532 | /// Parse the current string as an integer of the specified \p Radix, or of |

533 | /// an autosensed radix if the \p Radix given is 0. The current value in |

534 | /// \p Result is discarded, and the storage is changed to be wide enough to |

535 | /// store the parsed integer. |

536 | /// |

537 | /// \returns true if the string does not solely consist of a valid |

538 | /// non-empty number in the appropriate base. |

539 | /// |

540 | /// APInt::fromString is superficially similar but assumes the |

541 | /// string is well-formed in the given radix. |

542 | bool getAsInteger(unsigned Radix, APInt &Result) const; |

543 | |

544 | /// Parse the current string as an IEEE double-precision floating |

545 | /// point value. The string must be a well-formed double. |

546 | /// |

547 | /// If \p AllowInexact is false, the function will fail if the string |

548 | /// cannot be represented exactly. Otherwise, the function only fails |

549 | /// in case of an overflow or underflow. |

550 | bool getAsDouble(double &Result, bool AllowInexact = true) const; |

551 | |

552 | /// @} |

553 | /// @name String Operations |

554 | /// @{ |

555 | |

556 | // Convert the given ASCII string to lowercase. |

557 | LLVM_NODISCARD |

558 | std::string lower() const; |

559 | |

560 | /// Convert the given ASCII string to uppercase. |

561 | LLVM_NODISCARD |

562 | std::string upper() const; |

563 | |

564 | /// @} |

565 | /// @name Substring Operations |

566 | /// @{ |

567 | |

568 | /// Return a reference to the substring from [Start, Start + N). |

569 | /// |

570 | /// \param Start The index of the starting character in the substring; if |

571 | /// the index is npos or greater than the length of the string then the |

572 | /// empty substring will be returned. |

573 | /// |

574 | /// \param N The number of characters to included in the substring. If N |

575 | /// exceeds the number of characters remaining in the string, the string |

576 | /// suffix (starting with \p Start) will be returned. |

577 | LLVM_NODISCARD |

578 | StringRef substr(size_t Start, size_t N = npos) const { |

579 | Start = std::min(Start, Length); |

580 | return StringRef(Data + Start, std::min(N, Length - Start)); |

581 | } |

582 | |

583 | /// Return a StringRef equal to 'this' but with only the first \p N |

584 | /// elements remaining. If \p N is greater than the length of the |

585 | /// string, the entire string is returned. |

586 | LLVM_NODISCARD |

587 | StringRef take_front(size_t N = 1) const { |

588 | if (N >= size()) |

589 | return *this; |

590 | return drop_back(size() - N); |

591 | } |

592 | |

593 | /// Return a StringRef equal to 'this' but with only the last \p N |

594 | /// elements remaining. If \p N is greater than the length of the |

595 | /// string, the entire string is returned. |

596 | LLVM_NODISCARD |

597 | StringRef take_back(size_t N = 1) const { |

598 | if (N >= size()) |

599 | return *this; |

600 | return drop_front(size() - N); |

601 | } |

602 | |

603 | /// Return the longest prefix of 'this' such that every character |

604 | /// in the prefix satisfies the given predicate. |

605 | LLVM_NODISCARD |

606 | StringRef take_while(function_ref<bool(char)> F) const { |

607 | return substr(0, find_if_not(F)); |

608 | } |

609 | |

610 | /// Return the longest prefix of 'this' such that no character in |

611 | /// the prefix satisfies the given predicate. |

612 | LLVM_NODISCARD |

613 | StringRef take_until(function_ref<bool(char)> F) const { |

614 | return substr(0, find_if(F)); |

615 | } |

616 | |

617 | /// Return a StringRef equal to 'this' but with the first \p N elements |

618 | /// dropped. |

619 | LLVM_NODISCARD |

620 | StringRef drop_front(size_t N = 1) const { |

621 | assert(size() >= N && "Dropping more elements than exist"); |

622 | return substr(N); |

623 | } |

624 | |

625 | /// Return a StringRef equal to 'this' but with the last \p N elements |

626 | /// dropped. |

627 | LLVM_NODISCARD |

628 | StringRef drop_back(size_t N = 1) const { |

629 | assert(size() >= N && "Dropping more elements than exist"); |

630 | return substr(0, size()-N); |

631 | } |

632 | |

633 | /// Return a StringRef equal to 'this', but with all characters satisfying |

634 | /// the given predicate dropped from the beginning of the string. |

635 | LLVM_NODISCARD |

636 | StringRef drop_while(function_ref<bool(char)> F) const { |

637 | return substr(find_if_not(F)); |

638 | } |

639 | |

640 | /// Return a StringRef equal to 'this', but with all characters not |

641 | /// satisfying the given predicate dropped from the beginning of the string. |

642 | LLVM_NODISCARD |

643 | StringRef drop_until(function_ref<bool(char)> F) const { |

644 | return substr(find_if(F)); |

645 | } |

646 | |

647 | /// Returns true if this StringRef has the given prefix and removes that |

648 | /// prefix. |

649 | bool consume_front(StringRef Prefix) { |

650 | if (!startswith(Prefix)) |

651 | return false; |

652 | |

653 | *this = drop_front(Prefix.size()); |

654 | return true; |

655 | } |

656 | |

657 | /// Returns true if this StringRef has the given suffix and removes that |

658 | /// suffix. |

659 | bool consume_back(StringRef Suffix) { |

660 | if (!endswith(Suffix)) |

661 | return false; |

662 | |

663 | *this = drop_back(Suffix.size()); |

664 | return true; |

665 | } |

666 | |

667 | /// Return a reference to the substring from [Start, End). |

668 | /// |

669 | /// \param Start The index of the starting character in the substring; if |

670 | /// the index is npos or greater than the length of the string then the |

671 | /// empty substring will be returned. |

672 | /// |

673 | /// \param End The index following the last character to include in the |

674 | /// substring. If this is npos or exceeds the number of characters |

675 | /// remaining in the string, the string suffix (starting with \p Start) |

676 | /// will be returned. If this is less than \p Start, an empty string will |

677 | /// be returned. |

678 | LLVM_NODISCARD |

679 | StringRef slice(size_t Start, size_t End) const { |

680 | Start = std::min(Start, Length); |

681 | End = std::min(std::max(Start, End), Length); |

682 | return StringRef(Data + Start, End - Start); |

683 | } |

684 | |

685 | /// Split into two substrings around the first occurrence of a separator |

686 | /// character. |

687 | /// |

688 | /// If \p Separator is in the string, then the result is a pair (LHS, RHS) |

689 | /// such that (*this == LHS + Separator + RHS) is true and RHS is |

690 | /// maximal. If \p Separator is not in the string, then the result is a |

691 | /// pair (LHS, RHS) where (*this == LHS) and (RHS == ""). |

692 | /// |

693 | /// \param Separator The character to split on. |

694 | /// \returns The split substrings. |

695 | LLVM_NODISCARD |

696 | std::pair<StringRef, StringRef> split(char Separator) const { |

697 | return split(StringRef(&Separator, 1)); |

698 | } |

699 | |

700 | /// Split into two substrings around the first occurrence of a separator |

701 | /// string. |

702 | /// |

703 | /// If \p Separator is in the string, then the result is a pair (LHS, RHS) |

704 | /// such that (*this == LHS + Separator + RHS) is true and RHS is |

705 | /// maximal. If \p Separator is not in the string, then the result is a |

706 | /// pair (LHS, RHS) where (*this == LHS) and (RHS == ""). |

707 | /// |

708 | /// \param Separator - The string to split on. |

709 | /// \return - The split substrings. |

710 | LLVM_NODISCARD |

711 | std::pair<StringRef, StringRef> split(StringRef Separator) const { |

712 | size_t Idx = find(Separator); |

713 | if (Idx == npos) |

714 | return std::make_pair(*this, StringRef()); |

715 | return std::make_pair(slice(0, Idx), slice(Idx + Separator.size(), npos)); |

716 | } |

717 | |

718 | /// Split into two substrings around the last occurrence of a separator |

719 | /// string. |

720 | /// |

721 | /// If \p Separator is in the string, then the result is a pair (LHS, RHS) |

722 | /// such that (*this == LHS + Separator + RHS) is true and RHS is |

723 | /// minimal. If \p Separator is not in the string, then the result is a |

724 | /// pair (LHS, RHS) where (*this == LHS) and (RHS == ""). |

725 | /// |

726 | /// \param Separator - The string to split on. |

727 | /// \return - The split substrings. |

728 | LLVM_NODISCARD |

729 | std::pair<StringRef, StringRef> rsplit(StringRef Separator) const { |

730 | size_t Idx = rfind(Separator); |

731 | if (Idx == npos) |

732 | return std::make_pair(*this, StringRef()); |

733 | return std::make_pair(slice(0, Idx), slice(Idx + Separator.size(), npos)); |

734 | } |

735 | |

736 | /// Split into substrings around the occurrences of a separator string. |

737 | /// |

738 | /// Each substring is stored in \p A. If \p MaxSplit is >= 0, at most |

739 | /// \p MaxSplit splits are done and consequently <= \p MaxSplit + 1 |

740 | /// elements are added to A. |

741 | /// If \p KeepEmpty is false, empty strings are not added to \p A. They |

742 | /// still count when considering \p MaxSplit |

743 | /// An useful invariant is that |

744 | /// Separator.join(A) == *this if MaxSplit == -1 and KeepEmpty == true |

745 | /// |

746 | /// \param A - Where to put the substrings. |

747 | /// \param Separator - The string to split on. |

748 | /// \param MaxSplit - The maximum number of times the string is split. |

749 | /// \param KeepEmpty - True if empty substring should be added. |

750 | void split(SmallVectorImpl<StringRef> &A, |

751 | StringRef Separator, int MaxSplit = -1, |

752 | bool KeepEmpty = true) const; |

753 | |

754 | /// Split into substrings around the occurrences of a separator character. |

755 | /// |

756 | /// Each substring is stored in \p A. If \p MaxSplit is >= 0, at most |

757 | /// \p MaxSplit splits are done and consequently <= \p MaxSplit + 1 |

758 | /// elements are added to A. |

759 | /// If \p KeepEmpty is false, empty strings are not added to \p A. They |

760 | /// still count when considering \p MaxSplit |

761 | /// An useful invariant is that |

762 | /// Separator.join(A) == *this if MaxSplit == -1 and KeepEmpty == true |

763 | /// |

764 | /// \param A - Where to put the substrings. |

765 | /// \param Separator - The string to split on. |

766 | /// \param MaxSplit - The maximum number of times the string is split. |

767 | /// \param KeepEmpty - True if empty substring should be added. |

768 | void split(SmallVectorImpl<StringRef> &A, char Separator, int MaxSplit = -1, |

769 | bool KeepEmpty = true) const; |

770 | |

771 | /// Split into two substrings around the last occurrence of a separator |

772 | /// character. |

773 | /// |

774 | /// If \p Separator is in the string, then the result is a pair (LHS, RHS) |

775 | /// such that (*this == LHS + Separator + RHS) is true and RHS is |

776 | /// minimal. If \p Separator is not in the string, then the result is a |

777 | /// pair (LHS, RHS) where (*this == LHS) and (RHS == ""). |

778 | /// |

779 | /// \param Separator - The character to split on. |

780 | /// \return - The split substrings. |

781 | LLVM_NODISCARD |

782 | std::pair<StringRef, StringRef> rsplit(char Separator) const { |

783 | return rsplit(StringRef(&Separator, 1)); |

784 | } |

785 | |

786 | /// Return string with consecutive \p Char characters starting from the |

787 | /// the left removed. |

788 | LLVM_NODISCARD |

789 | StringRef ltrim(char Char) const { |

790 | return drop_front(std::min(Length, find_first_not_of(Char))); |

791 | } |

792 | |

793 | /// Return string with consecutive characters in \p Chars starting from |

794 | /// the left removed. |

795 | LLVM_NODISCARD |

796 | StringRef ltrim(StringRef Chars = " \t\n\v\f\r") const { |

797 | return drop_front(std::min(Length, find_first_not_of(Chars))); |

798 | } |

799 | |

800 | /// Return string with consecutive \p Char characters starting from the |

801 | /// right removed. |

802 | LLVM_NODISCARD |

803 | StringRef rtrim(char Char) const { |

804 | return drop_back(Length - std::min(Length, find_last_not_of(Char) + 1)); |

805 | } |

806 | |

807 | /// Return string with consecutive characters in \p Chars starting from |

808 | /// the right removed. |

809 | LLVM_NODISCARD |

810 | StringRef rtrim(StringRef Chars = " \t\n\v\f\r") const { |

811 | return drop_back(Length - std::min(Length, find_last_not_of(Chars) + 1)); |

812 | } |

813 | |

814 | /// Return string with consecutive \p Char characters starting from the |

815 | /// left and right removed. |

816 | LLVM_NODISCARD |

817 | StringRef trim(char Char) const { |

818 | return ltrim(Char).rtrim(Char); |

819 | } |

820 | |

821 | /// Return string with consecutive characters in \p Chars starting from |

822 | /// the left and right removed. |

823 | LLVM_NODISCARD |

824 | StringRef trim(StringRef Chars = " \t\n\v\f\r") const { |

825 | return ltrim(Chars).rtrim(Chars); |

826 | } |

827 | |

828 | /// @} |

829 | }; |

830 | |

831 | /// A wrapper around a string literal that serves as a proxy for constructing |

832 | /// global tables of StringRefs with the length computed at compile time. |

833 | /// In order to avoid the invocation of a global constructor, StringLiteral |

834 | /// should *only* be used in a constexpr context, as such: |

835 | /// |

836 | /// constexpr StringLiteral S("test"); |

837 | /// |

838 | class StringLiteral : public StringRef { |

839 | private: |

840 | constexpr StringLiteral(const char *Str, size_t N) : StringRef(Str, N) { |

841 | } |

842 | |

843 | public: |

844 | template <size_t N> |

845 | constexpr StringLiteral(const char (&Str)[N]) |

846 | #if defined(__clang__) && __has_attribute(enable_if) |

847 | #pragma clang diagnostic push |

848 | #pragma clang diagnostic ignored "-Wgcc-compat" |

849 | __attribute((enable_if(__builtin_strlen(Str) == N - 1, |

850 | "invalid string literal"))) |

851 | #pragma clang diagnostic pop |

852 | #endif |

853 | : StringRef(Str, N - 1) { |

854 | } |

855 | |

856 | // Explicit construction for strings like "foo\0bar". |

857 | template <size_t N> |

858 | static constexpr StringLiteral withInnerNUL(const char (&Str)[N]) { |

859 | return StringLiteral(Str, N - 1); |

860 | } |

861 | }; |

862 | |

863 | /// @name StringRef Comparison Operators |

864 | /// @{ |

865 | |

866 | inline bool operator==(StringRef LHS, StringRef RHS) { |

867 | return LHS.equals(RHS); |

868 | } |

869 | |

870 | inline bool operator!=(StringRef LHS, StringRef RHS) { return !(LHS == RHS); } |

871 | |

872 | inline bool operator<(StringRef LHS, StringRef RHS) { |

873 | return LHS.compare(RHS) == -1; |

874 | } |

875 | |

876 | inline bool operator<=(StringRef LHS, StringRef RHS) { |

877 | return LHS.compare(RHS) != 1; |

878 | } |

879 | |

880 | inline bool operator>(StringRef LHS, StringRef RHS) { |

881 | return LHS.compare(RHS) == 1; |

882 | } |

883 | |

884 | inline bool operator>=(StringRef LHS, StringRef RHS) { |

885 | return LHS.compare(RHS) != -1; |

886 | } |

887 | |

888 | inline std::string &operator+=(std::string &buffer, StringRef string) { |

889 | return buffer.append(string.data(), string.size()); |

890 | } |

891 | |

892 | /// @} |

893 | |

894 | /// Compute a hash_code for a StringRef. |

895 | LLVM_NODISCARD |

896 | hash_code hash_value(StringRef S); |

897 | |

898 | } // end namespace llvm |

899 | |

900 | #endif // LLVM_ADT_STRINGREF_H |

901 |