1 | //===- Twine.h - Fast Temporary String Concatenation ------------*- C++ -*-===// |
---|---|

2 | // |

3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |

4 | // See https://llvm.org/LICENSE.txt for license information. |

5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |

6 | // |

7 | //===----------------------------------------------------------------------===// |

8 | |

9 | #ifndef LLVM_ADT_TWINE_H |

10 | #define LLVM_ADT_TWINE_H |

11 | |

12 | #include "llvm/ADT/SmallVector.h" |

13 | #include "llvm/ADT/StringRef.h" |

14 | #include "llvm/Support/ErrorHandling.h" |

15 | #include <cassert> |

16 | #include <cstdint> |

17 | #include <string> |

18 | |

19 | namespace llvm { |

20 | |

21 | class formatv_object_base; |

22 | class raw_ostream; |

23 | |

24 | /// Twine - A lightweight data structure for efficiently representing the |

25 | /// concatenation of temporary values as strings. |

26 | /// |

27 | /// A Twine is a kind of rope, it represents a concatenated string using a |

28 | /// binary-tree, where the string is the preorder of the nodes. Since the |

29 | /// Twine can be efficiently rendered into a buffer when its result is used, |

30 | /// it avoids the cost of generating temporary values for intermediate string |

31 | /// results -- particularly in cases when the Twine result is never |

32 | /// required. By explicitly tracking the type of leaf nodes, we can also avoid |

33 | /// the creation of temporary strings for conversions operations (such as |

34 | /// appending an integer to a string). |

35 | /// |

36 | /// A Twine is not intended for use directly and should not be stored, its |

37 | /// implementation relies on the ability to store pointers to temporary stack |

38 | /// objects which may be deallocated at the end of a statement. Twines should |

39 | /// only be used accepted as const references in arguments, when an API wishes |

40 | /// to accept possibly-concatenated strings. |

41 | /// |

42 | /// Twines support a special 'null' value, which always concatenates to form |

43 | /// itself, and renders as an empty string. This can be returned from APIs to |

44 | /// effectively nullify any concatenations performed on the result. |

45 | /// |

46 | /// \b Implementation |

47 | /// |

48 | /// Given the nature of a Twine, it is not possible for the Twine's |

49 | /// concatenation method to construct interior nodes; the result must be |

50 | /// represented inside the returned value. For this reason a Twine object |

51 | /// actually holds two values, the left- and right-hand sides of a |

52 | /// concatenation. We also have nullary Twine objects, which are effectively |

53 | /// sentinel values that represent empty strings. |

54 | /// |

55 | /// Thus, a Twine can effectively have zero, one, or two children. The \see |

56 | /// isNullary(), \see isUnary(), and \see isBinary() predicates exist for |

57 | /// testing the number of children. |

58 | /// |

59 | /// We maintain a number of invariants on Twine objects (FIXME: Why): |

60 | /// - Nullary twines are always represented with their Kind on the left-hand |

61 | /// side, and the Empty kind on the right-hand side. |

62 | /// - Unary twines are always represented with the value on the left-hand |

63 | /// side, and the Empty kind on the right-hand side. |

64 | /// - If a Twine has another Twine as a child, that child should always be |

65 | /// binary (otherwise it could have been folded into the parent). |

66 | /// |

67 | /// These invariants are check by \see isValid(). |

68 | /// |

69 | /// \b Efficiency Considerations |

70 | /// |

71 | /// The Twine is designed to yield efficient and small code for common |

72 | /// situations. For this reason, the concat() method is inlined so that |

73 | /// concatenations of leaf nodes can be optimized into stores directly into a |

74 | /// single stack allocated object. |

75 | /// |

76 | /// In practice, not all compilers can be trusted to optimize concat() fully, |

77 | /// so we provide two additional methods (and accompanying operator+ |

78 | /// overloads) to guarantee that particularly important cases (cstring plus |

79 | /// StringRef) codegen as desired. |

80 | class Twine { |

81 | /// NodeKind - Represent the type of an argument. |

82 | enum NodeKind : unsigned char { |

83 | /// An empty string; the result of concatenating anything with it is also |

84 | /// empty. |

85 | NullKind, |

86 | |

87 | /// The empty string. |

88 | EmptyKind, |

89 | |

90 | /// A pointer to a Twine instance. |

91 | TwineKind, |

92 | |

93 | /// A pointer to a C string instance. |

94 | CStringKind, |

95 | |

96 | /// A pointer to an std::string instance. |

97 | StdStringKind, |

98 | |

99 | /// A pointer to a StringRef instance. |

100 | StringRefKind, |

101 | |

102 | /// A pointer to a SmallString instance. |

103 | SmallStringKind, |

104 | |

105 | /// A pointer to a formatv_object_base instance. |

106 | FormatvObjectKind, |

107 | |

108 | /// A char value, to render as a character. |

109 | CharKind, |

110 | |

111 | /// An unsigned int value, to render as an unsigned decimal integer. |

112 | DecUIKind, |

113 | |

114 | /// An int value, to render as a signed decimal integer. |

115 | DecIKind, |

116 | |

117 | /// A pointer to an unsigned long value, to render as an unsigned decimal |

118 | /// integer. |

119 | DecULKind, |

120 | |

121 | /// A pointer to a long value, to render as a signed decimal integer. |

122 | DecLKind, |

123 | |

124 | /// A pointer to an unsigned long long value, to render as an unsigned |

125 | /// decimal integer. |

126 | DecULLKind, |

127 | |

128 | /// A pointer to a long long value, to render as a signed decimal integer. |

129 | DecLLKind, |

130 | |

131 | /// A pointer to a uint64_t value, to render as an unsigned hexadecimal |

132 | /// integer. |

133 | UHexKind |

134 | }; |

135 | |

136 | union Child |

137 | { |

138 | const Twine *twine; |

139 | const char *cString; |

140 | const std::string *stdString; |

141 | const StringRef *stringRef; |

142 | const SmallVectorImpl<char> *smallString; |

143 | const formatv_object_base *formatvObject; |

144 | char character; |

145 | unsigned int decUI; |

146 | int decI; |

147 | const unsigned long *decUL; |

148 | const long *decL; |

149 | const unsigned long long *decULL; |

150 | const long long *decLL; |

151 | const uint64_t *uHex; |

152 | }; |

153 | |

154 | /// LHS - The prefix in the concatenation, which may be uninitialized for |

155 | /// Null or Empty kinds. |

156 | Child LHS; |

157 | |

158 | /// RHS - The suffix in the concatenation, which may be uninitialized for |

159 | /// Null or Empty kinds. |

160 | Child RHS; |

161 | |

162 | /// LHSKind - The NodeKind of the left hand side, \see getLHSKind(). |

163 | NodeKind LHSKind = EmptyKind; |

164 | |

165 | /// RHSKind - The NodeKind of the right hand side, \see getRHSKind(). |

166 | NodeKind RHSKind = EmptyKind; |

167 | |

168 | /// Construct a nullary twine; the kind must be NullKind or EmptyKind. |

169 | explicit Twine(NodeKind Kind) : LHSKind(Kind) { |

170 | assert(isNullary() && "Invalid kind!"); |

171 | } |

172 | |

173 | /// Construct a binary twine. |

174 | explicit Twine(const Twine &LHS, const Twine &RHS) |

175 | : LHSKind(TwineKind), RHSKind(TwineKind) { |

176 | this->LHS.twine = &LHS; |

177 | this->RHS.twine = &RHS; |

178 | assert(isValid() && "Invalid twine!"); |

179 | } |

180 | |

181 | /// Construct a twine from explicit values. |

182 | explicit Twine(Child LHS, NodeKind LHSKind, Child RHS, NodeKind RHSKind) |

183 | : LHS(LHS), RHS(RHS), LHSKind(LHSKind), RHSKind(RHSKind) { |

184 | assert(isValid() && "Invalid twine!"); |

185 | } |

186 | |

187 | /// Check for the null twine. |

188 | bool isNull() const { |

189 | return getLHSKind() == NullKind; |

190 | } |

191 | |

192 | /// Check for the empty twine. |

193 | bool isEmpty() const { |

194 | return getLHSKind() == EmptyKind; |

195 | } |

196 | |

197 | /// Check if this is a nullary twine (null or empty). |

198 | bool isNullary() const { |

199 | return isNull() || isEmpty(); |

200 | } |

201 | |

202 | /// Check if this is a unary twine. |

203 | bool isUnary() const { |

204 | return getRHSKind() == EmptyKind && !isNullary(); |

205 | } |

206 | |

207 | /// Check if this is a binary twine. |

208 | bool isBinary() const { |

209 | return getLHSKind() != NullKind && getRHSKind() != EmptyKind; |

210 | } |

211 | |

212 | /// Check if this is a valid twine (satisfying the invariants on |

213 | /// order and number of arguments). |

214 | bool isValid() const { |

215 | // Nullary twines always have Empty on the RHS. |

216 | if (isNullary() && getRHSKind() != EmptyKind) |

217 | return false; |

218 | |

219 | // Null should never appear on the RHS. |

220 | if (getRHSKind() == NullKind) |

221 | return false; |

222 | |

223 | // The RHS cannot be non-empty if the LHS is empty. |

224 | if (getRHSKind() != EmptyKind && getLHSKind() == EmptyKind) |

225 | return false; |

226 | |

227 | // A twine child should always be binary. |

228 | if (getLHSKind() == TwineKind && |

229 | !LHS.twine->isBinary()) |

230 | return false; |

231 | if (getRHSKind() == TwineKind && |

232 | !RHS.twine->isBinary()) |

233 | return false; |

234 | |

235 | return true; |

236 | } |

237 | |

238 | /// Get the NodeKind of the left-hand side. |

239 | NodeKind getLHSKind() const { return LHSKind; } |

240 | |

241 | /// Get the NodeKind of the right-hand side. |

242 | NodeKind getRHSKind() const { return RHSKind; } |

243 | |

244 | /// Print one child from a twine. |

245 | void printOneChild(raw_ostream &OS, Child Ptr, NodeKind Kind) const; |

246 | |

247 | /// Print the representation of one child from a twine. |

248 | void printOneChildRepr(raw_ostream &OS, Child Ptr, |

249 | NodeKind Kind) const; |

250 | |

251 | public: |

252 | /// @name Constructors |

253 | /// @{ |

254 | |

255 | /// Construct from an empty string. |

256 | /*implicit*/ Twine() { |

257 | assert(isValid() && "Invalid twine!"); |

258 | } |

259 | |

260 | Twine(const Twine &) = default; |

261 | |

262 | /// Construct from a C string. |

263 | /// |

264 | /// We take care here to optimize "" into the empty twine -- this will be |

265 | /// optimized out for string constants. This allows Twine arguments have |

266 | /// default "" values, without introducing unnecessary string constants. |

267 | /*implicit*/ Twine(const char *Str) { |

268 | if (Str[0] != '\0') { |

269 | LHS.cString = Str; |

270 | LHSKind = CStringKind; |

271 | } else |

272 | LHSKind = EmptyKind; |

273 | |

274 | assert(isValid() && "Invalid twine!"); |

275 | } |

276 | /// Delete the implicit conversion from nullptr as Twine(const char *) |

277 | /// cannot take nullptr. |

278 | /*implicit*/ Twine(std::nullptr_t) = delete; |

279 | |

280 | /// Construct from an std::string. |

281 | /*implicit*/ Twine(const std::string &Str) : LHSKind(StdStringKind) { |

282 | LHS.stdString = &Str; |

283 | assert(isValid() && "Invalid twine!"); |

284 | } |

285 | |

286 | /// Construct from a StringRef. |

287 | /*implicit*/ Twine(const StringRef &Str) : LHSKind(StringRefKind) { |

288 | LHS.stringRef = &Str; |

289 | assert(isValid() && "Invalid twine!"); |

290 | } |

291 | |

292 | /// Construct from a SmallString. |

293 | /*implicit*/ Twine(const SmallVectorImpl<char> &Str) |

294 | : LHSKind(SmallStringKind) { |

295 | LHS.smallString = &Str; |

296 | assert(isValid() && "Invalid twine!"); |

297 | } |

298 | |

299 | /// Construct from a formatv_object_base. |

300 | /*implicit*/ Twine(const formatv_object_base &Fmt) |

301 | : LHSKind(FormatvObjectKind) { |

302 | LHS.formatvObject = &Fmt; |

303 | assert(isValid() && "Invalid twine!"); |

304 | } |

305 | |

306 | /// Construct from a char. |

307 | explicit Twine(char Val) : LHSKind(CharKind) { |

308 | LHS.character = Val; |

309 | } |

310 | |

311 | /// Construct from a signed char. |

312 | explicit Twine(signed char Val) : LHSKind(CharKind) { |

313 | LHS.character = static_cast<char>(Val); |

314 | } |

315 | |

316 | /// Construct from an unsigned char. |

317 | explicit Twine(unsigned char Val) : LHSKind(CharKind) { |

318 | LHS.character = static_cast<char>(Val); |

319 | } |

320 | |

321 | /// Construct a twine to print \p Val as an unsigned decimal integer. |

322 | explicit Twine(unsigned Val) : LHSKind(DecUIKind) { |

323 | LHS.decUI = Val; |

324 | } |

325 | |

326 | /// Construct a twine to print \p Val as a signed decimal integer. |

327 | explicit Twine(int Val) : LHSKind(DecIKind) { |

328 | LHS.decI = Val; |

329 | } |

330 | |

331 | /// Construct a twine to print \p Val as an unsigned decimal integer. |

332 | explicit Twine(const unsigned long &Val) : LHSKind(DecULKind) { |

333 | LHS.decUL = &Val; |

334 | } |

335 | |

336 | /// Construct a twine to print \p Val as a signed decimal integer. |

337 | explicit Twine(const long &Val) : LHSKind(DecLKind) { |

338 | LHS.decL = &Val; |

339 | } |

340 | |

341 | /// Construct a twine to print \p Val as an unsigned decimal integer. |

342 | explicit Twine(const unsigned long long &Val) : LHSKind(DecULLKind) { |

343 | LHS.decULL = &Val; |

344 | } |

345 | |

346 | /// Construct a twine to print \p Val as a signed decimal integer. |

347 | explicit Twine(const long long &Val) : LHSKind(DecLLKind) { |

348 | LHS.decLL = &Val; |

349 | } |

350 | |

351 | // FIXME: Unfortunately, to make sure this is as efficient as possible we |

352 | // need extra binary constructors from particular types. We can't rely on |

353 | // the compiler to be smart enough to fold operator+()/concat() down to the |

354 | // right thing. Yet. |

355 | |

356 | /// Construct as the concatenation of a C string and a StringRef. |

357 | /*implicit*/ Twine(const char *LHS, const StringRef &RHS) |

358 | : LHSKind(CStringKind), RHSKind(StringRefKind) { |

359 | this->LHS.cString = LHS; |

360 | this->RHS.stringRef = &RHS; |

361 | assert(isValid() && "Invalid twine!"); |

362 | } |

363 | |

364 | /// Construct as the concatenation of a StringRef and a C string. |

365 | /*implicit*/ Twine(const StringRef &LHS, const char *RHS) |

366 | : LHSKind(StringRefKind), RHSKind(CStringKind) { |

367 | this->LHS.stringRef = &LHS; |

368 | this->RHS.cString = RHS; |

369 | assert(isValid() && "Invalid twine!"); |

370 | } |

371 | |

372 | /// Since the intended use of twines is as temporary objects, assignments |

373 | /// when concatenating might cause undefined behavior or stack corruptions |

374 | Twine &operator=(const Twine &) = delete; |

375 | |

376 | /// Create a 'null' string, which is an empty string that always |

377 | /// concatenates to form another empty string. |

378 | static Twine createNull() { |

379 | return Twine(NullKind); |

380 | } |

381 | |

382 | /// @} |

383 | /// @name Numeric Conversions |

384 | /// @{ |

385 | |

386 | // Construct a twine to print \p Val as an unsigned hexadecimal integer. |

387 | static Twine utohexstr(const uint64_t &Val) { |

388 | Child LHS, RHS; |

389 | LHS.uHex = &Val; |

390 | RHS.twine = nullptr; |

391 | return Twine(LHS, UHexKind, RHS, EmptyKind); |

392 | } |

393 | |

394 | /// @} |

395 | /// @name Predicate Operations |

396 | /// @{ |

397 | |

398 | /// Check if this twine is trivially empty; a false return value does not |

399 | /// necessarily mean the twine is empty. |

400 | bool isTriviallyEmpty() const { |

401 | return isNullary(); |

402 | } |

403 | |

404 | /// Return true if this twine can be dynamically accessed as a single |

405 | /// StringRef value with getSingleStringRef(). |

406 | bool isSingleStringRef() const { |

407 | if (getRHSKind() != EmptyKind) return false; |

408 | |

409 | switch (getLHSKind()) { |

410 | case EmptyKind: |

411 | case CStringKind: |

412 | case StdStringKind: |

413 | case StringRefKind: |

414 | case SmallStringKind: |

415 | return true; |

416 | default: |

417 | return false; |

418 | } |

419 | } |

420 | |

421 | /// @} |

422 | /// @name String Operations |

423 | /// @{ |

424 | |

425 | Twine concat(const Twine &Suffix) const; |

426 | |

427 | /// @} |

428 | /// @name Output & Conversion. |

429 | /// @{ |

430 | |

431 | /// Return the twine contents as a std::string. |

432 | std::string str() const; |

433 | |

434 | /// Append the concatenated string into the given SmallString or SmallVector. |

435 | void toVector(SmallVectorImpl<char> &Out) const; |

436 | |

437 | /// This returns the twine as a single StringRef. This method is only valid |

438 | /// if isSingleStringRef() is true. |

439 | StringRef getSingleStringRef() const { |

440 | assert(isSingleStringRef() &&"This cannot be had as a single stringref!"); |

441 | switch (getLHSKind()) { |

442 | default: llvm_unreachable("Out of sync with isSingleStringRef"); |

443 | case EmptyKind: return StringRef(); |

444 | case CStringKind: return StringRef(LHS.cString); |

445 | case StdStringKind: return StringRef(*LHS.stdString); |

446 | case StringRefKind: return *LHS.stringRef; |

447 | case SmallStringKind: |

448 | return StringRef(LHS.smallString->data(), LHS.smallString->size()); |

449 | } |

450 | } |

451 | |

452 | /// This returns the twine as a single StringRef if it can be |

453 | /// represented as such. Otherwise the twine is written into the given |

454 | /// SmallVector and a StringRef to the SmallVector's data is returned. |

455 | StringRef toStringRef(SmallVectorImpl<char> &Out) const { |

456 | if (isSingleStringRef()) |

457 | return getSingleStringRef(); |

458 | toVector(Out); |

459 | return StringRef(Out.data(), Out.size()); |

460 | } |

461 | |

462 | /// This returns the twine as a single null terminated StringRef if it |

463 | /// can be represented as such. Otherwise the twine is written into the |

464 | /// given SmallVector and a StringRef to the SmallVector's data is returned. |

465 | /// |

466 | /// The returned StringRef's size does not include the null terminator. |

467 | StringRef toNullTerminatedStringRef(SmallVectorImpl<char> &Out) const; |

468 | |

469 | /// Write the concatenated string represented by this twine to the |

470 | /// stream \p OS. |

471 | void print(raw_ostream &OS) const; |

472 | |

473 | /// Dump the concatenated string represented by this twine to stderr. |

474 | void dump() const; |

475 | |

476 | /// Write the representation of this twine to the stream \p OS. |

477 | void printRepr(raw_ostream &OS) const; |

478 | |

479 | /// Dump the representation of this twine to stderr. |

480 | void dumpRepr() const; |

481 | |

482 | /// @} |

483 | }; |

484 | |

485 | /// @name Twine Inline Implementations |

486 | /// @{ |

487 | |

488 | inline Twine Twine::concat(const Twine &Suffix) const { |

489 | // Concatenation with null is null. |

490 | if (isNull() || Suffix.isNull()) |

491 | return Twine(NullKind); |

492 | |

493 | // Concatenation with empty yields the other side. |

494 | if (isEmpty()) |

495 | return Suffix; |

496 | if (Suffix.isEmpty()) |

497 | return *this; |

498 | |

499 | // Otherwise we need to create a new node, taking care to fold in unary |

500 | // twines. |

501 | Child NewLHS, NewRHS; |

502 | NewLHS.twine = this; |

503 | NewRHS.twine = &Suffix; |

504 | NodeKind NewLHSKind = TwineKind, NewRHSKind = TwineKind; |

505 | if (isUnary()) { |

506 | NewLHS = LHS; |

507 | NewLHSKind = getLHSKind(); |

508 | } |

509 | if (Suffix.isUnary()) { |

510 | NewRHS = Suffix.LHS; |

511 | NewRHSKind = Suffix.getLHSKind(); |

512 | } |

513 | |

514 | return Twine(NewLHS, NewLHSKind, NewRHS, NewRHSKind); |

515 | } |

516 | |

517 | inline Twine operator+(const Twine &LHS, const Twine &RHS) { |

518 | return LHS.concat(RHS); |

519 | } |

520 | |

521 | /// Additional overload to guarantee simplified codegen; this is equivalent to |

522 | /// concat(). |

523 | |

524 | inline Twine operator+(const char *LHS, const StringRef &RHS) { |

525 | return Twine(LHS, RHS); |

526 | } |

527 | |

528 | /// Additional overload to guarantee simplified codegen; this is equivalent to |

529 | /// concat(). |

530 | |

531 | inline Twine operator+(const StringRef &LHS, const char *RHS) { |

532 | return Twine(LHS, RHS); |

533 | } |

534 | |

535 | inline raw_ostream &operator<<(raw_ostream &OS, const Twine &RHS) { |

536 | RHS.print(OS); |

537 | return OS; |

538 | } |

539 | |

540 | /// @} |

541 | |

542 | } // end namespace llvm |

543 | |

544 | #endif // LLVM_ADT_TWINE_H |

545 |