1 | //===- StringRef.h - Constant String Reference Wrapper ----------*- C++ -*-===// |
---|---|

2 | // |

3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |

4 | // See https://llvm.org/LICENSE.txt for license information. |

5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |

6 | // |

7 | //===----------------------------------------------------------------------===// |

8 | |

9 | #ifndef LLVM_ADT_STRINGREF_H |

10 | #define LLVM_ADT_STRINGREF_H |

11 | |

12 | #include "llvm/ADT/STLExtras.h" |

13 | #include "llvm/ADT/iterator_range.h" |

14 | #include "llvm/Support/Compiler.h" |

15 | #include <algorithm> |

16 | #include <cassert> |

17 | #include <cstddef> |

18 | #include <cstring> |

19 | #include <limits> |

20 | #include <string> |

21 | #if __cplusplus > 201402L |

22 | #include <string_view> |

23 | #endif |

24 | #include <type_traits> |

25 | #include <utility> |

26 | |

27 | // Declare the __builtin_strlen intrinsic for MSVC so it can be used in |

28 | // constexpr context. |

29 | #if defined(_MSC_VER) |

30 | extern "C"size_t __builtin_strlen( const char *); |

31 | #endif |

32 | |

33 | namespace llvm { |

34 | |

35 | class APInt; |

36 | class hash_code; |

37 | template <typename T> class SmallVectorImpl; |

38 | class StringRef; |

39 | |

40 | /// Helper functions for StringRef::getAsInteger. |

41 | bool getAsUnsignedInteger(StringRef Str, unsigned Radix, |

42 | unsigned long long &Result); |

43 | |

44 | bool getAsSignedInteger(StringRef Str, unsigned Radix, long long &Result); |

45 | |

46 | bool consumeUnsignedInteger(StringRef &Str, unsigned Radix, |

47 | unsigned long long &Result); |

48 | bool consumeSignedInteger(StringRef &Str, unsigned Radix, long long &Result); |

49 | |

50 | /// StringRef - Represent a constant reference to a string, i.e. a character |

51 | /// array and a length, which need not be null terminated. |

52 | /// |

53 | /// This class does not own the string data, it is expected to be used in |

54 | /// situations where the character data resides in some other buffer, whose |

55 | /// lifetime extends past that of the StringRef. For this reason, it is not in |

56 | /// general safe to store a StringRef. |

57 | class LLVM_GSL_POINTER StringRef { |

58 | public: |

59 | static constexpr size_t npos = ~size_t(0); |

60 | |

61 | using iterator = const char *; |

62 | using const_iterator = const char *; |

63 | using size_type = size_t; |

64 | |

65 | private: |

66 | /// The start of the string, in an external buffer. |

67 | const char *Data = nullptr; |

68 | |

69 | /// The length of the string. |

70 | size_t Length = 0; |

71 | |

72 | // Workaround memcmp issue with null pointers (undefined behavior) |

73 | // by providing a specialized version |

74 | static int compareMemory(const char *Lhs, const char *Rhs, size_t Length) { |

75 | if (Length == 0) { return 0; } |

76 | return ::memcmp(Lhs,Rhs,Length); |

77 | } |

78 | |

79 | // Constexpr version of std::strlen. |

80 | static constexpr size_t strLen(const char *Str) { |

81 | #if __cplusplus > 201402L |

82 | return std::char_traits<char>::length(Str); |

83 | #elif __has_builtin(__builtin_strlen) || defined(__GNUC__) || \ |

84 | (defined(_MSC_VER) && _MSC_VER >= 1916) |

85 | return __builtin_strlen(Str); |

86 | #else |

87 | const char *Begin = Str; |

88 | while (*Str != '\0') |

89 | ++Str; |

90 | return Str - Begin; |

91 | #endif |

92 | } |

93 | |

94 | public: |

95 | /// @name Constructors |

96 | /// @{ |

97 | |

98 | /// Construct an empty string ref. |

99 | /*implicit*/ StringRef() = default; |

100 | |

101 | /// Disable conversion from nullptr. This prevents things like |

102 | /// if (S == nullptr) |

103 | StringRef(std::nullptr_t) = delete; |

104 | |

105 | /// Construct a string ref from a cstring. |

106 | /*implicit*/ constexpr StringRef(const char *Str) |

107 | : Data(Str), Length(Str ? strLen(Str) : 0) {} |

108 | |

109 | /// Construct a string ref from a pointer and length. |

110 | /*implicit*/ constexpr StringRef(const char *data, size_t length) |

111 | : Data(data), Length(length) {} |

112 | |

113 | /// Construct a string ref from an std::string. |

114 | /*implicit*/ StringRef(const std::string &Str) |

115 | : Data(Str.data()), Length(Str.length()) {} |

116 | |

117 | #if __cplusplus > 201402L |

118 | /// Construct a string ref from an std::string_view. |

119 | /*implicit*/ constexpr StringRef(std::string_view Str) |

120 | : Data(Str.data()), Length(Str.size()) {} |

121 | #endif |

122 | |

123 | static StringRef withNullAsEmpty(const char *data) { |

124 | return StringRef(data ? data : ""); |

125 | } |

126 | |

127 | /// @} |

128 | /// @name Iterators |

129 | /// @{ |

130 | |

131 | iterator begin() const { return Data; } |

132 | |

133 | iterator end() const { return Data + Length; } |

134 | |

135 | const unsigned char *bytes_begin() const { |

136 | return reinterpret_cast<const unsigned char *>(begin()); |

137 | } |

138 | const unsigned char *bytes_end() const { |

139 | return reinterpret_cast<const unsigned char *>(end()); |

140 | } |

141 | iterator_range<const unsigned char *> bytes() const { |

142 | return make_range(bytes_begin(), bytes_end()); |

143 | } |

144 | |

145 | /// @} |

146 | /// @name String Operations |

147 | /// @{ |

148 | |

149 | /// data - Get a pointer to the start of the string (which may not be null |

150 | /// terminated). |

151 | LLVM_NODISCARD |

152 | const char *data() const { return Data; } |

153 | |

154 | /// empty - Check if the string is empty. |

155 | LLVM_NODISCARD |

156 | bool empty() const { return Length == 0; } |

157 | |

158 | /// size - Get the string size. |

159 | LLVM_NODISCARD |

160 | size_t size() const { return Length; } |

161 | |

162 | /// front - Get the first character in the string. |

163 | LLVM_NODISCARD |

164 | char front() const { |

165 | assert(!empty()); |

166 | return Data[0]; |

167 | } |

168 | |

169 | /// back - Get the last character in the string. |

170 | LLVM_NODISCARD |

171 | char back() const { |

172 | assert(!empty()); |

173 | return Data[Length-1]; |

174 | } |

175 | |

176 | // copy - Allocate copy in Allocator and return StringRef to it. |

177 | template <typename Allocator> |

178 | LLVM_NODISCARD StringRef copy(Allocator &A) const { |

179 | // Don't request a length 0 copy from the allocator. |

180 | if (empty()) |

181 | return StringRef(); |

182 | char *S = A.template Allocate<char>(Length); |

183 | std::copy(begin(), end(), S); |

184 | return StringRef(S, Length); |

185 | } |

186 | |

187 | /// equals - Check for string equality, this is more efficient than |

188 | /// compare() when the relative ordering of inequal strings isn't needed. |

189 | LLVM_NODISCARD |

190 | bool equals(StringRef RHS) const { |

191 | return (Length == RHS.Length && |

192 | compareMemory(Data, RHS.Data, RHS.Length) == 0); |

193 | } |

194 | |

195 | /// equals_lower - Check for string equality, ignoring case. |

196 | LLVM_NODISCARD |

197 | bool equals_lower(StringRef RHS) const { |

198 | return Length == RHS.Length && compare_lower(RHS) == 0; |

199 | } |

200 | |

201 | /// compare - Compare two strings; the result is -1, 0, or 1 if this string |

202 | /// is lexicographically less than, equal to, or greater than the \p RHS. |

203 | LLVM_NODISCARD |

204 | int compare(StringRef RHS) const { |

205 | // Check the prefix for a mismatch. |

206 | if (int Res = compareMemory(Data, RHS.Data, std::min(Length, RHS.Length))) |

207 | return Res < 0 ? -1 : 1; |

208 | |

209 | // Otherwise the prefixes match, so we only need to check the lengths. |

210 | if (Length == RHS.Length) |

211 | return 0; |

212 | return Length < RHS.Length ? -1 : 1; |

213 | } |

214 | |

215 | /// compare_lower - Compare two strings, ignoring case. |

216 | LLVM_NODISCARD |

217 | int compare_lower(StringRef RHS) const; |

218 | |

219 | /// compare_numeric - Compare two strings, treating sequences of digits as |

220 | /// numbers. |

221 | LLVM_NODISCARD |

222 | int compare_numeric(StringRef RHS) const; |

223 | |

224 | /// Determine the edit distance between this string and another |

225 | /// string. |

226 | /// |

227 | /// \param Other the string to compare this string against. |

228 | /// |

229 | /// \param AllowReplacements whether to allow character |

230 | /// replacements (change one character into another) as a single |

231 | /// operation, rather than as two operations (an insertion and a |

232 | /// removal). |

233 | /// |

234 | /// \param MaxEditDistance If non-zero, the maximum edit distance that |

235 | /// this routine is allowed to compute. If the edit distance will exceed |

236 | /// that maximum, returns \c MaxEditDistance+1. |

237 | /// |

238 | /// \returns the minimum number of character insertions, removals, |

239 | /// or (if \p AllowReplacements is \c true) replacements needed to |

240 | /// transform one of the given strings into the other. If zero, |

241 | /// the strings are identical. |

242 | LLVM_NODISCARD |

243 | unsigned edit_distance(StringRef Other, bool AllowReplacements = true, |

244 | unsigned MaxEditDistance = 0) const; |

245 | |

246 | /// str - Get the contents as an std::string. |

247 | LLVM_NODISCARD |

248 | std::string str() const { |

249 | if (!Data) return std::string(); |

250 | return std::string(Data, Length); |

251 | } |

252 | |

253 | /// @} |

254 | /// @name Operator Overloads |

255 | /// @{ |

256 | |

257 | LLVM_NODISCARD |

258 | char operator[](size_t Index) const { |

259 | assert(Index < Length && "Invalid index!"); |

260 | return Data[Index]; |

261 | } |

262 | |

263 | /// Disallow accidental assignment from a temporary std::string. |

264 | /// |

265 | /// The declaration here is extra complicated so that `stringRef = {}` |

266 | /// and `stringRef = "abc"` continue to select the move assignment operator. |

267 | template <typename T> |

268 | std::enable_if_t<std::is_same<T, std::string>::value, StringRef> & |

269 | operator=(T &&Str) = delete; |

270 | |

271 | /// @} |

272 | /// @name Type Conversions |

273 | /// @{ |

274 | |

275 | explicit operator std::string() const { return str(); } |

276 | |

277 | #if __cplusplus > 201402L |

278 | operator std::string_view() const { |

279 | return std::string_view(data(), size()); |

280 | } |

281 | #endif |

282 | |

283 | /// @} |

284 | /// @name String Predicates |

285 | /// @{ |

286 | |

287 | /// Check if this string starts with the given \p Prefix. |

288 | LLVM_NODISCARD |

289 | bool startswith(StringRef Prefix) const { |

290 | return Length >= Prefix.Length && |

291 | compareMemory(Data, Prefix.Data, Prefix.Length) == 0; |

292 | } |

293 | |

294 | /// Check if this string starts with the given \p Prefix, ignoring case. |

295 | LLVM_NODISCARD |

296 | bool startswith_lower(StringRef Prefix) const; |

297 | |

298 | /// Check if this string ends with the given \p Suffix. |

299 | LLVM_NODISCARD |

300 | bool endswith(StringRef Suffix) const { |

301 | return Length >= Suffix.Length && |

302 | compareMemory(end() - Suffix.Length, Suffix.Data, Suffix.Length) == 0; |

303 | } |

304 | |

305 | /// Check if this string ends with the given \p Suffix, ignoring case. |

306 | LLVM_NODISCARD |

307 | bool endswith_lower(StringRef Suffix) const; |

308 | |

309 | /// @} |

310 | /// @name String Searching |

311 | /// @{ |

312 | |

313 | /// Search for the first character \p C in the string. |

314 | /// |

315 | /// \returns The index of the first occurrence of \p C, or npos if not |

316 | /// found. |

317 | LLVM_NODISCARD |

318 | size_t find(char C, size_t From = 0) const { |

319 | size_t FindBegin = std::min(From, Length); |

320 | if (FindBegin < Length) { // Avoid calling memchr with nullptr. |

321 | // Just forward to memchr, which is faster than a hand-rolled loop. |

322 | if (const void *P = ::memchr(Data + FindBegin, C, Length - FindBegin)) |

323 | return static_cast<const char *>(P) - Data; |

324 | } |

325 | return npos; |

326 | } |

327 | |

328 | /// Search for the first character \p C in the string, ignoring case. |

329 | /// |

330 | /// \returns The index of the first occurrence of \p C, or npos if not |

331 | /// found. |

332 | LLVM_NODISCARD |

333 | size_t find_lower(char C, size_t From = 0) const; |

334 | |

335 | /// Search for the first character satisfying the predicate \p F |

336 | /// |

337 | /// \returns The index of the first character satisfying \p F starting from |

338 | /// \p From, or npos if not found. |

339 | LLVM_NODISCARD |

340 | size_t find_if(function_ref<bool(char)> F, size_t From = 0) const { |

341 | StringRef S = drop_front(From); |

342 | while (!S.empty()) { |

343 | if (F(S.front())) |

344 | return size() - S.size(); |

345 | S = S.drop_front(); |

346 | } |

347 | return npos; |

348 | } |

349 | |

350 | /// Search for the first character not satisfying the predicate \p F |

351 | /// |

352 | /// \returns The index of the first character not satisfying \p F starting |

353 | /// from \p From, or npos if not found. |

354 | LLVM_NODISCARD |

355 | size_t find_if_not(function_ref<bool(char)> F, size_t From = 0) const { |

356 | return find_if([F](char c) { return !F(c); }, From); |

357 | } |

358 | |

359 | /// Search for the first string \p Str in the string. |

360 | /// |

361 | /// \returns The index of the first occurrence of \p Str, or npos if not |

362 | /// found. |

363 | LLVM_NODISCARD |

364 | size_t find(StringRef Str, size_t From = 0) const; |

365 | |

366 | /// Search for the first string \p Str in the string, ignoring case. |

367 | /// |

368 | /// \returns The index of the first occurrence of \p Str, or npos if not |

369 | /// found. |

370 | LLVM_NODISCARD |

371 | size_t find_lower(StringRef Str, size_t From = 0) const; |

372 | |

373 | /// Search for the last character \p C in the string. |

374 | /// |

375 | /// \returns The index of the last occurrence of \p C, or npos if not |

376 | /// found. |

377 | LLVM_NODISCARD |

378 | size_t rfind(char C, size_t From = npos) const { |

379 | From = std::min(From, Length); |

380 | size_t i = From; |

381 | while (i != 0) { |

382 | --i; |

383 | if (Data[i] == C) |

384 | return i; |

385 | } |

386 | return npos; |

387 | } |

388 | |

389 | /// Search for the last character \p C in the string, ignoring case. |

390 | /// |

391 | /// \returns The index of the last occurrence of \p C, or npos if not |

392 | /// found. |

393 | LLVM_NODISCARD |

394 | size_t rfind_lower(char C, size_t From = npos) const; |

395 | |

396 | /// Search for the last string \p Str in the string. |

397 | /// |

398 | /// \returns The index of the last occurrence of \p Str, or npos if not |

399 | /// found. |

400 | LLVM_NODISCARD |

401 | size_t rfind(StringRef Str) const; |

402 | |

403 | /// Search for the last string \p Str in the string, ignoring case. |

404 | /// |

405 | /// \returns The index of the last occurrence of \p Str, or npos if not |

406 | /// found. |

407 | LLVM_NODISCARD |

408 | size_t rfind_lower(StringRef Str) const; |

409 | |

410 | /// Find the first character in the string that is \p C, or npos if not |

411 | /// found. Same as find. |

412 | LLVM_NODISCARD |

413 | size_t find_first_of(char C, size_t From = 0) const { |

414 | return find(C, From); |

415 | } |

416 | |

417 | /// Find the first character in the string that is in \p Chars, or npos if |

418 | /// not found. |

419 | /// |

420 | /// Complexity: O(size() + Chars.size()) |

421 | LLVM_NODISCARD |

422 | size_t find_first_of(StringRef Chars, size_t From = 0) const; |

423 | |

424 | /// Find the first character in the string that is not \p C or npos if not |

425 | /// found. |

426 | LLVM_NODISCARD |

427 | size_t find_first_not_of(char C, size_t From = 0) const; |

428 | |

429 | /// Find the first character in the string that is not in the string |

430 | /// \p Chars, or npos if not found. |

431 | /// |

432 | /// Complexity: O(size() + Chars.size()) |

433 | LLVM_NODISCARD |

434 | size_t find_first_not_of(StringRef Chars, size_t From = 0) const; |

435 | |

436 | /// Find the last character in the string that is \p C, or npos if not |

437 | /// found. |

438 | LLVM_NODISCARD |

439 | size_t find_last_of(char C, size_t From = npos) const { |

440 | return rfind(C, From); |

441 | } |

442 | |

443 | /// Find the last character in the string that is in \p C, or npos if not |

444 | /// found. |

445 | /// |

446 | /// Complexity: O(size() + Chars.size()) |

447 | LLVM_NODISCARD |

448 | size_t find_last_of(StringRef Chars, size_t From = npos) const; |

449 | |

450 | /// Find the last character in the string that is not \p C, or npos if not |

451 | /// found. |

452 | LLVM_NODISCARD |

453 | size_t find_last_not_of(char C, size_t From = npos) const; |

454 | |

455 | /// Find the last character in the string that is not in \p Chars, or |

456 | /// npos if not found. |

457 | /// |

458 | /// Complexity: O(size() + Chars.size()) |

459 | LLVM_NODISCARD |

460 | size_t find_last_not_of(StringRef Chars, size_t From = npos) const; |

461 | |

462 | /// Return true if the given string is a substring of *this, and false |

463 | /// otherwise. |

464 | LLVM_NODISCARD |

465 | bool contains(StringRef Other) const { return find(Other) != npos; } |

466 | |

467 | /// Return true if the given character is contained in *this, and false |

468 | /// otherwise. |

469 | LLVM_NODISCARD |

470 | bool contains(char C) const { return find_first_of(C) != npos; } |

471 | |

472 | /// Return true if the given string is a substring of *this, and false |

473 | /// otherwise. |

474 | LLVM_NODISCARD |

475 | bool contains_lower(StringRef Other) const { |

476 | return find_lower(Other) != npos; |

477 | } |

478 | |

479 | /// Return true if the given character is contained in *this, and false |

480 | /// otherwise. |

481 | LLVM_NODISCARD |

482 | bool contains_lower(char C) const { return find_lower(C) != npos; } |

483 | |

484 | /// @} |

485 | /// @name Helpful Algorithms |

486 | /// @{ |

487 | |

488 | /// Return the number of occurrences of \p C in the string. |

489 | LLVM_NODISCARD |

490 | size_t count(char C) const { |

491 | size_t Count = 0; |

492 | for (size_t i = 0, e = Length; i != e; ++i) |

493 | if (Data[i] == C) |

494 | ++Count; |

495 | return Count; |

496 | } |

497 | |

498 | /// Return the number of non-overlapped occurrences of \p Str in |

499 | /// the string. |

500 | size_t count(StringRef Str) const; |

501 | |

502 | /// Parse the current string as an integer of the specified radix. If |

503 | /// \p Radix is specified as zero, this does radix autosensing using |

504 | /// extended C rules: 0 is octal, 0x is hex, 0b is binary. |

505 | /// |

506 | /// If the string is invalid or if only a subset of the string is valid, |

507 | /// this returns true to signify the error. The string is considered |

508 | /// erroneous if empty or if it overflows T. |

509 | template <typename T> |

510 | std::enable_if_t<std::numeric_limits<T>::is_signed, bool> |

511 | getAsInteger(unsigned Radix, T &Result) const { |

512 | long long LLVal; |

513 | if (getAsSignedInteger(*this, Radix, LLVal) || |

514 | static_cast<T>(LLVal) != LLVal) |

515 | return true; |

516 | Result = LLVal; |

517 | return false; |

518 | } |

519 | |

520 | template <typename T> |

521 | std::enable_if_t<!std::numeric_limits<T>::is_signed, bool> |

522 | getAsInteger(unsigned Radix, T &Result) const { |

523 | unsigned long long ULLVal; |

524 | // The additional cast to unsigned long long is required to avoid the |

525 | // Visual C++ warning C4805: '!=' : unsafe mix of type 'bool' and type |

526 | // 'unsigned __int64' when instantiating getAsInteger with T = bool. |

527 | if (getAsUnsignedInteger(*this, Radix, ULLVal) || |

528 | static_cast<unsigned long long>(static_cast<T>(ULLVal)) != ULLVal) |

529 | return true; |

530 | Result = ULLVal; |

531 | return false; |

532 | } |

533 | |

534 | /// Parse the current string as an integer of the specified radix. If |

535 | /// \p Radix is specified as zero, this does radix autosensing using |

536 | /// extended C rules: 0 is octal, 0x is hex, 0b is binary. |

537 | /// |

538 | /// If the string does not begin with a number of the specified radix, |

539 | /// this returns true to signify the error. The string is considered |

540 | /// erroneous if empty or if it overflows T. |

541 | /// The portion of the string representing the discovered numeric value |

542 | /// is removed from the beginning of the string. |

543 | template <typename T> |

544 | std::enable_if_t<std::numeric_limits<T>::is_signed, bool> |

545 | consumeInteger(unsigned Radix, T &Result) { |

546 | long long LLVal; |

547 | if (consumeSignedInteger(*this, Radix, LLVal) || |

548 | static_cast<long long>(static_cast<T>(LLVal)) != LLVal) |

549 | return true; |

550 | Result = LLVal; |

551 | return false; |

552 | } |

553 | |

554 | template <typename T> |

555 | std::enable_if_t<!std::numeric_limits<T>::is_signed, bool> |

556 | consumeInteger(unsigned Radix, T &Result) { |

557 | unsigned long long ULLVal; |

558 | if (consumeUnsignedInteger(*this, Radix, ULLVal) || |

559 | static_cast<unsigned long long>(static_cast<T>(ULLVal)) != ULLVal) |

560 | return true; |

561 | Result = ULLVal; |

562 | return false; |

563 | } |

564 | |

565 | /// Parse the current string as an integer of the specified \p Radix, or of |

566 | /// an autosensed radix if the \p Radix given is 0. The current value in |

567 | /// \p Result is discarded, and the storage is changed to be wide enough to |

568 | /// store the parsed integer. |

569 | /// |

570 | /// \returns true if the string does not solely consist of a valid |

571 | /// non-empty number in the appropriate base. |

572 | /// |

573 | /// APInt::fromString is superficially similar but assumes the |

574 | /// string is well-formed in the given radix. |

575 | bool getAsInteger(unsigned Radix, APInt &Result) const; |

576 | |

577 | /// Parse the current string as an IEEE double-precision floating |

578 | /// point value. The string must be a well-formed double. |

579 | /// |

580 | /// If \p AllowInexact is false, the function will fail if the string |

581 | /// cannot be represented exactly. Otherwise, the function only fails |

582 | /// in case of an overflow or underflow, or an invalid floating point |

583 | /// representation. |

584 | bool getAsDouble(double &Result, bool AllowInexact = true) const; |

585 | |

586 | /// @} |

587 | /// @name String Operations |

588 | /// @{ |

589 | |

590 | // Convert the given ASCII string to lowercase. |

591 | LLVM_NODISCARD |

592 | std::string lower() const; |

593 | |

594 | /// Convert the given ASCII string to uppercase. |

595 | LLVM_NODISCARD |

596 | std::string upper() const; |

597 | |

598 | /// @} |

599 | /// @name Substring Operations |

600 | /// @{ |

601 | |

602 | /// Return a reference to the substring from [Start, Start + N). |

603 | /// |

604 | /// \param Start The index of the starting character in the substring; if |

605 | /// the index is npos or greater than the length of the string then the |

606 | /// empty substring will be returned. |

607 | /// |

608 | /// \param N The number of characters to included in the substring. If N |

609 | /// exceeds the number of characters remaining in the string, the string |

610 | /// suffix (starting with \p Start) will be returned. |

611 | LLVM_NODISCARD |

612 | StringRef substr(size_t Start, size_t N = npos) const { |

613 | Start = std::min(Start, Length); |

614 | return StringRef(Data + Start, std::min(N, Length - Start)); |

615 | } |

616 | |

617 | /// Return a StringRef equal to 'this' but with only the first \p N |

618 | /// elements remaining. If \p N is greater than the length of the |

619 | /// string, the entire string is returned. |

620 | LLVM_NODISCARD |

621 | StringRef take_front(size_t N = 1) const { |

622 | if (N >= size()) |

623 | return *this; |

624 | return drop_back(size() - N); |

625 | } |

626 | |

627 | /// Return a StringRef equal to 'this' but with only the last \p N |

628 | /// elements remaining. If \p N is greater than the length of the |

629 | /// string, the entire string is returned. |

630 | LLVM_NODISCARD |

631 | StringRef take_back(size_t N = 1) const { |

632 | if (N >= size()) |

633 | return *this; |

634 | return drop_front(size() - N); |

635 | } |

636 | |

637 | /// Return the longest prefix of 'this' such that every character |

638 | /// in the prefix satisfies the given predicate. |

639 | LLVM_NODISCARD |

640 | StringRef take_while(function_ref<bool(char)> F) const { |

641 | return substr(0, find_if_not(F)); |

642 | } |

643 | |

644 | /// Return the longest prefix of 'this' such that no character in |

645 | /// the prefix satisfies the given predicate. |

646 | LLVM_NODISCARD |

647 | StringRef take_until(function_ref<bool(char)> F) const { |

648 | return substr(0, find_if(F)); |

649 | } |

650 | |

651 | /// Return a StringRef equal to 'this' but with the first \p N elements |

652 | /// dropped. |

653 | LLVM_NODISCARD |

654 | StringRef drop_front(size_t N = 1) const { |

655 | assert(size() >= N && "Dropping more elements than exist"); |

656 | return substr(N); |

657 | } |

658 | |

659 | /// Return a StringRef equal to 'this' but with the last \p N elements |

660 | /// dropped. |

661 | LLVM_NODISCARD |

662 | StringRef drop_back(size_t N = 1) const { |

663 | assert(size() >= N && "Dropping more elements than exist"); |

664 | return substr(0, size()-N); |

665 | } |

666 | |

667 | /// Return a StringRef equal to 'this', but with all characters satisfying |

668 | /// the given predicate dropped from the beginning of the string. |

669 | LLVM_NODISCARD |

670 | StringRef drop_while(function_ref<bool(char)> F) const { |

671 | return substr(find_if_not(F)); |

672 | } |

673 | |

674 | /// Return a StringRef equal to 'this', but with all characters not |

675 | /// satisfying the given predicate dropped from the beginning of the string. |

676 | LLVM_NODISCARD |

677 | StringRef drop_until(function_ref<bool(char)> F) const { |

678 | return substr(find_if(F)); |

679 | } |

680 | |

681 | /// Returns true if this StringRef has the given prefix and removes that |

682 | /// prefix. |

683 | bool consume_front(StringRef Prefix) { |

684 | if (!startswith(Prefix)) |

685 | return false; |

686 | |

687 | *this = drop_front(Prefix.size()); |

688 | return true; |

689 | } |

690 | |

691 | /// Returns true if this StringRef has the given suffix and removes that |

692 | /// suffix. |

693 | bool consume_back(StringRef Suffix) { |

694 | if (!endswith(Suffix)) |

695 | return false; |

696 | |

697 | *this = drop_back(Suffix.size()); |

698 | return true; |

699 | } |

700 | |

701 | /// Return a reference to the substring from [Start, End). |

702 | /// |

703 | /// \param Start The index of the starting character in the substring; if |

704 | /// the index is npos or greater than the length of the string then the |

705 | /// empty substring will be returned. |

706 | /// |

707 | /// \param End The index following the last character to include in the |

708 | /// substring. If this is npos or exceeds the number of characters |

709 | /// remaining in the string, the string suffix (starting with \p Start) |

710 | /// will be returned. If this is less than \p Start, an empty string will |

711 | /// be returned. |

712 | LLVM_NODISCARD |

713 | StringRef slice(size_t Start, size_t End) const { |

714 | Start = std::min(Start, Length); |

715 | End = std::min(std::max(Start, End), Length); |

716 | return StringRef(Data + Start, End - Start); |

717 | } |

718 | |

719 | /// Split into two substrings around the first occurrence of a separator |

720 | /// character. |

721 | /// |

722 | /// If \p Separator is in the string, then the result is a pair (LHS, RHS) |

723 | /// such that (*this == LHS + Separator + RHS) is true and RHS is |

724 | /// maximal. If \p Separator is not in the string, then the result is a |

725 | /// pair (LHS, RHS) where (*this == LHS) and (RHS == ""). |

726 | /// |

727 | /// \param Separator The character to split on. |

728 | /// \returns The split substrings. |

729 | LLVM_NODISCARD |

730 | std::pair<StringRef, StringRef> split(char Separator) const { |

731 | return split(StringRef(&Separator, 1)); |

732 | } |

733 | |

734 | /// Split into two substrings around the first occurrence of a separator |

735 | /// string. |

736 | /// |

737 | /// If \p Separator is in the string, then the result is a pair (LHS, RHS) |

738 | /// such that (*this == LHS + Separator + RHS) is true and RHS is |

739 | /// maximal. If \p Separator is not in the string, then the result is a |

740 | /// pair (LHS, RHS) where (*this == LHS) and (RHS == ""). |

741 | /// |

742 | /// \param Separator - The string to split on. |

743 | /// \return - The split substrings. |

744 | LLVM_NODISCARD |

745 | std::pair<StringRef, StringRef> split(StringRef Separator) const { |

746 | size_t Idx = find(Separator); |

747 | if (Idx == npos) |

748 | return std::make_pair(*this, StringRef()); |

749 | return std::make_pair(slice(0, Idx), slice(Idx + Separator.size(), npos)); |

750 | } |

751 | |

752 | /// Split into two substrings around the last occurrence of a separator |

753 | /// string. |

754 | /// |

755 | /// If \p Separator is in the string, then the result is a pair (LHS, RHS) |

756 | /// such that (*this == LHS + Separator + RHS) is true and RHS is |

757 | /// minimal. If \p Separator is not in the string, then the result is a |

758 | /// pair (LHS, RHS) where (*this == LHS) and (RHS == ""). |

759 | /// |

760 | /// \param Separator - The string to split on. |

761 | /// \return - The split substrings. |

762 | LLVM_NODISCARD |

763 | std::pair<StringRef, StringRef> rsplit(StringRef Separator) const { |

764 | size_t Idx = rfind(Separator); |

765 | if (Idx == npos) |

766 | return std::make_pair(*this, StringRef()); |

767 | return std::make_pair(slice(0, Idx), slice(Idx + Separator.size(), npos)); |

768 | } |

769 | |

770 | /// Split into substrings around the occurrences of a separator string. |

771 | /// |

772 | /// Each substring is stored in \p A. If \p MaxSplit is >= 0, at most |

773 | /// \p MaxSplit splits are done and consequently <= \p MaxSplit + 1 |

774 | /// elements are added to A. |

775 | /// If \p KeepEmpty is false, empty strings are not added to \p A. They |

776 | /// still count when considering \p MaxSplit |

777 | /// An useful invariant is that |

778 | /// Separator.join(A) == *this if MaxSplit == -1 and KeepEmpty == true |

779 | /// |

780 | /// \param A - Where to put the substrings. |

781 | /// \param Separator - The string to split on. |

782 | /// \param MaxSplit - The maximum number of times the string is split. |

783 | /// \param KeepEmpty - True if empty substring should be added. |

784 | void split(SmallVectorImpl<StringRef> &A, |

785 | StringRef Separator, int MaxSplit = -1, |

786 | bool KeepEmpty = true) const; |

787 | |

788 | /// Split into substrings around the occurrences of a separator character. |

789 | /// |

790 | /// Each substring is stored in \p A. If \p MaxSplit is >= 0, at most |

791 | /// \p MaxSplit splits are done and consequently <= \p MaxSplit + 1 |

792 | /// elements are added to A. |

793 | /// If \p KeepEmpty is false, empty strings are not added to \p A. They |

794 | /// still count when considering \p MaxSplit |

795 | /// An useful invariant is that |

796 | /// Separator.join(A) == *this if MaxSplit == -1 and KeepEmpty == true |

797 | /// |

798 | /// \param A - Where to put the substrings. |

799 | /// \param Separator - The string to split on. |

800 | /// \param MaxSplit - The maximum number of times the string is split. |

801 | /// \param KeepEmpty - True if empty substring should be added. |

802 | void split(SmallVectorImpl<StringRef> &A, char Separator, int MaxSplit = -1, |

803 | bool KeepEmpty = true) const; |

804 | |

805 | /// Split into two substrings around the last occurrence of a separator |

806 | /// character. |

807 | /// |

808 | /// If \p Separator is in the string, then the result is a pair (LHS, RHS) |

809 | /// such that (*this == LHS + Separator + RHS) is true and RHS is |

810 | /// minimal. If \p Separator is not in the string, then the result is a |

811 | /// pair (LHS, RHS) where (*this == LHS) and (RHS == ""). |

812 | /// |

813 | /// \param Separator - The character to split on. |

814 | /// \return - The split substrings. |

815 | LLVM_NODISCARD |

816 | std::pair<StringRef, StringRef> rsplit(char Separator) const { |

817 | return rsplit(StringRef(&Separator, 1)); |

818 | } |

819 | |

820 | /// Return string with consecutive \p Char characters starting from the |

821 | /// the left removed. |

822 | LLVM_NODISCARD |

823 | StringRef ltrim(char Char) const { |

824 | return drop_front(std::min(Length, find_first_not_of(Char))); |

825 | } |

826 | |

827 | /// Return string with consecutive characters in \p Chars starting from |

828 | /// the left removed. |

829 | LLVM_NODISCARD |

830 | StringRef ltrim(StringRef Chars = " \t\n\v\f\r") const { |

831 | return drop_front(std::min(Length, find_first_not_of(Chars))); |

832 | } |

833 | |

834 | /// Return string with consecutive \p Char characters starting from the |

835 | /// right removed. |

836 | LLVM_NODISCARD |

837 | StringRef rtrim(char Char) const { |

838 | return drop_back(Length - std::min(Length, find_last_not_of(Char) + 1)); |

839 | } |

840 | |

841 | /// Return string with consecutive characters in \p Chars starting from |

842 | /// the right removed. |

843 | LLVM_NODISCARD |

844 | StringRef rtrim(StringRef Chars = " \t\n\v\f\r") const { |

845 | return drop_back(Length - std::min(Length, find_last_not_of(Chars) + 1)); |

846 | } |

847 | |

848 | /// Return string with consecutive \p Char characters starting from the |

849 | /// left and right removed. |

850 | LLVM_NODISCARD |

851 | StringRef trim(char Char) const { |

852 | return ltrim(Char).rtrim(Char); |

853 | } |

854 | |

855 | /// Return string with consecutive characters in \p Chars starting from |

856 | /// the left and right removed. |

857 | LLVM_NODISCARD |

858 | StringRef trim(StringRef Chars = " \t\n\v\f\r") const { |

859 | return ltrim(Chars).rtrim(Chars); |

860 | } |

861 | |

862 | /// @} |

863 | }; |

864 | |

865 | /// A wrapper around a string literal that serves as a proxy for constructing |

866 | /// global tables of StringRefs with the length computed at compile time. |

867 | /// In order to avoid the invocation of a global constructor, StringLiteral |

868 | /// should *only* be used in a constexpr context, as such: |

869 | /// |

870 | /// constexpr StringLiteral S("test"); |

871 | /// |

872 | class StringLiteral : public StringRef { |

873 | private: |

874 | constexpr StringLiteral(const char *Str, size_t N) : StringRef(Str, N) { |

875 | } |

876 | |

877 | public: |

878 | template <size_t N> |

879 | constexpr StringLiteral(const char (&Str)[N]) |

880 | #if defined(__clang__) && __has_attribute(enable_if) |

881 | #pragma clang diagnostic push |

882 | #pragma clang diagnostic ignored "-Wgcc-compat" |

883 | __attribute((enable_if(__builtin_strlen(Str) == N - 1, |

884 | "invalid string literal"))) |

885 | #pragma clang diagnostic pop |

886 | #endif |

887 | : StringRef(Str, N - 1) { |

888 | } |

889 | |

890 | // Explicit construction for strings like "foo\0bar". |

891 | template <size_t N> |

892 | static constexpr StringLiteral withInnerNUL(const char (&Str)[N]) { |

893 | return StringLiteral(Str, N - 1); |

894 | } |

895 | }; |

896 | |

897 | /// @name StringRef Comparison Operators |

898 | /// @{ |

899 | |

900 | inline bool operator==(StringRef LHS, StringRef RHS) { |

901 | return LHS.equals(RHS); |

902 | } |

903 | |

904 | inline bool operator!=(StringRef LHS, StringRef RHS) { return !(LHS == RHS); } |

905 | |

906 | inline bool operator<(StringRef LHS, StringRef RHS) { |

907 | return LHS.compare(RHS) == -1; |

908 | } |

909 | |

910 | inline bool operator<=(StringRef LHS, StringRef RHS) { |

911 | return LHS.compare(RHS) != 1; |

912 | } |

913 | |

914 | inline bool operator>(StringRef LHS, StringRef RHS) { |

915 | return LHS.compare(RHS) == 1; |

916 | } |

917 | |

918 | inline bool operator>=(StringRef LHS, StringRef RHS) { |

919 | return LHS.compare(RHS) != -1; |

920 | } |

921 | |

922 | inline std::string &operator+=(std::string &buffer, StringRef string) { |

923 | return buffer.append(string.data(), string.size()); |

924 | } |

925 | |

926 | /// @} |

927 | |

928 | /// Compute a hash_code for a StringRef. |

929 | LLVM_NODISCARD |

930 | hash_code hash_value(StringRef S); |

931 | |

932 | } // end namespace llvm |

933 | |

934 | #endif // LLVM_ADT_STRINGREF_H |

935 |