1//===--- FormatStringConverter.cpp - clang-tidy----------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// Implementation of the FormatStringConverter class which is used to convert
11/// printf format strings to C++ std::formatter format strings.
12///
13//===----------------------------------------------------------------------===//
14
15#include "FormatStringConverter.h"
16#include "../utils/FixItHintUtils.h"
17#include "clang/AST/Expr.h"
18#include "clang/ASTMatchers/ASTMatchFinder.h"
19#include "clang/Basic/LangOptions.h"
20#include "clang/Lex/Lexer.h"
21#include "clang/Tooling/FixIt.h"
22#include "llvm/ADT/StringExtras.h"
23#include "llvm/Support/Debug.h"
24
25using namespace clang::ast_matchers;
26using namespace clang::analyze_printf;
27
28namespace clang::tidy::utils {
29using clang::analyze_format_string::ConversionSpecifier;
30
31/// Is the passed type the actual "char" type, whether that be signed or
32/// unsigned, rather than explicit signed char or unsigned char types.
33static bool isRealCharType(const clang::QualType &Ty) {
34 using namespace clang;
35 const Type *DesugaredType = Ty->getUnqualifiedDesugaredType();
36 if (const auto *BT = llvm::dyn_cast<BuiltinType>(Val: DesugaredType))
37 return (BT->getKind() == BuiltinType::Char_U ||
38 BT->getKind() == BuiltinType::Char_S);
39 return false;
40}
41
42/// If possible, return the text name of the signed type that corresponds to the
43/// passed integer type. If the passed type is already signed then its name is
44/// just returned. Only supports BuiltinTypes.
45static std::optional<std::string>
46getCorrespondingSignedTypeName(const clang::QualType &QT) {
47 using namespace clang;
48 const auto UQT = QT.getUnqualifiedType();
49 if (const auto *BT = llvm::dyn_cast<BuiltinType>(Val: UQT)) {
50 switch (BT->getKind()) {
51 case BuiltinType::UChar:
52 case BuiltinType::Char_U:
53 case BuiltinType::SChar:
54 case BuiltinType::Char_S:
55 return "signed char";
56 case BuiltinType::UShort:
57 case BuiltinType::Short:
58 return "short";
59 case BuiltinType::UInt:
60 case BuiltinType::Int:
61 return "int";
62 case BuiltinType::ULong:
63 case BuiltinType::Long:
64 return "long";
65 case BuiltinType::ULongLong:
66 case BuiltinType::LongLong:
67 return "long long";
68 default:
69 llvm::dbgs() << "Unknown corresponding signed type for BuiltinType '"
70 << QT.getAsString() << "'\n";
71 return std::nullopt;
72 }
73 }
74
75 // Deal with fixed-width integer types from <cstdint>. Use std:: prefix only
76 // if the argument type does.
77 const std::string TypeName = UQT.getAsString();
78 StringRef SimplifiedTypeName{TypeName};
79 const bool InStd = SimplifiedTypeName.consume_front(Prefix: "std::");
80 const StringRef Prefix = InStd ? "std::" : "";
81
82 if (SimplifiedTypeName.starts_with(Prefix: "uint") &&
83 SimplifiedTypeName.ends_with(Suffix: "_t"))
84 return (Twine(Prefix) + SimplifiedTypeName.drop_front()).str();
85
86 if (SimplifiedTypeName == "size_t")
87 return (Twine(Prefix) + "ssize_t").str();
88
89 llvm::dbgs() << "Unknown corresponding signed type for non-BuiltinType '"
90 << UQT.getAsString() << "'\n";
91 return std::nullopt;
92}
93
94/// If possible, return the text name of the unsigned type that corresponds to
95/// the passed integer type. If the passed type is already unsigned then its
96/// name is just returned. Only supports BuiltinTypes.
97static std::optional<std::string>
98getCorrespondingUnsignedTypeName(const clang::QualType &QT) {
99 using namespace clang;
100 const auto UQT = QT.getUnqualifiedType();
101 if (const auto *BT = llvm::dyn_cast<BuiltinType>(Val: UQT)) {
102 switch (BT->getKind()) {
103 case BuiltinType::SChar:
104 case BuiltinType::Char_S:
105 case BuiltinType::UChar:
106 case BuiltinType::Char_U:
107 return "unsigned char";
108 case BuiltinType::Short:
109 case BuiltinType::UShort:
110 return "unsigned short";
111 case BuiltinType::Int:
112 case BuiltinType::UInt:
113 return "unsigned int";
114 case BuiltinType::Long:
115 case BuiltinType::ULong:
116 return "unsigned long";
117 case BuiltinType::LongLong:
118 case BuiltinType::ULongLong:
119 return "unsigned long long";
120 default:
121 llvm::dbgs() << "Unknown corresponding unsigned type for BuiltinType '"
122 << UQT.getAsString() << "'\n";
123 return std::nullopt;
124 }
125 }
126
127 // Deal with fixed-width integer types from <cstdint>. Use std:: prefix only
128 // if the argument type does.
129 const std::string TypeName = UQT.getAsString();
130 StringRef SimplifiedTypeName{TypeName};
131 const bool InStd = SimplifiedTypeName.consume_front(Prefix: "std::");
132 const StringRef Prefix = InStd ? "std::" : "";
133
134 if (SimplifiedTypeName.starts_with(Prefix: "int") &&
135 SimplifiedTypeName.ends_with(Suffix: "_t"))
136 return (Twine(Prefix) + "u" + SimplifiedTypeName).str();
137
138 if (SimplifiedTypeName == "ssize_t")
139 return (Twine(Prefix) + "size_t").str();
140 if (SimplifiedTypeName == "ptrdiff_t")
141 return (Twine(Prefix) + "size_t").str();
142
143 llvm::dbgs() << "Unknown corresponding unsigned type for non-BuiltinType '"
144 << UQT.getAsString() << "'\n";
145 return std::nullopt;
146}
147
148static std::optional<std::string>
149castTypeForArgument(ConversionSpecifier::Kind ArgKind,
150 const clang::QualType &QT) {
151 if (ArgKind == ConversionSpecifier::Kind::uArg)
152 return getCorrespondingUnsignedTypeName(QT);
153 return getCorrespondingSignedTypeName(QT);
154}
155
156static bool isMatchingSignedness(ConversionSpecifier::Kind ArgKind,
157 const clang::QualType &ArgType) {
158 if (const auto *BT = llvm::dyn_cast<BuiltinType>(Val: ArgType)) {
159 // Unadorned char never matches any expected signedness since it
160 // could be signed or unsigned.
161 const auto ArgTypeKind = BT->getKind();
162 if (ArgTypeKind == BuiltinType::Char_U ||
163 ArgTypeKind == BuiltinType::Char_S)
164 return false;
165 }
166
167 if (ArgKind == ConversionSpecifier::Kind::uArg)
168 return ArgType->isUnsignedIntegerType();
169 return ArgType->isSignedIntegerType();
170}
171
172namespace {
173AST_MATCHER(clang::QualType, isRealChar) {
174 return clang::tidy::utils::isRealCharType(Ty: Node);
175}
176} // namespace
177
178static bool castMismatchedIntegerTypes(const CallExpr *Call, bool StrictMode) {
179 /// For printf-style functions, the signedness of the type printed is
180 /// indicated by the corresponding type in the format string.
181 /// std::print will determine the signedness from the type of the
182 /// argument. This means that it is necessary to generate a cast in
183 /// StrictMode to ensure that the exact behaviour is maintained.
184 /// However, for templated functions like absl::PrintF and
185 /// fmt::printf, the signedness of the type printed is also taken from
186 /// the actual argument like std::print, so such casts are never
187 /// necessary. printf-style functions are variadic, whereas templated
188 /// ones aren't, so we can use that to distinguish between the two
189 /// cases.
190 if (StrictMode) {
191 const FunctionDecl *FuncDecl = Call->getDirectCallee();
192 assert(FuncDecl);
193 return FuncDecl->isVariadic();
194 }
195 return false;
196}
197
198FormatStringConverter::FormatStringConverter(ASTContext *ContextIn,
199 const CallExpr *Call,
200 unsigned FormatArgOffset,
201 bool StrictMode,
202 const LangOptions &LO)
203 : Context(ContextIn),
204 CastMismatchedIntegerTypes(castMismatchedIntegerTypes(Call, StrictMode)),
205 Args(Call->getArgs()), NumArgs(Call->getNumArgs()),
206 ArgsOffset(FormatArgOffset + 1), LangOpts(LO) {
207 assert(ArgsOffset <= NumArgs);
208 FormatExpr = llvm::dyn_cast<StringLiteral>(
209 Val: Args[FormatArgOffset]->IgnoreImplicitAsWritten());
210 assert(FormatExpr);
211 if (!FormatExpr->isOrdinary())
212 return; // No wide string support yet
213 PrintfFormatString = FormatExpr->getString();
214
215 // Assume that the output will be approximately the same size as the input,
216 // but perhaps with a few escapes expanded.
217 const size_t EstimatedGrowth = 8;
218 StandardFormatString.reserve(res: PrintfFormatString.size() + EstimatedGrowth);
219 StandardFormatString.push_back(c: '\"');
220
221 const bool IsFreeBsdkPrintf = false;
222
223 using clang::analyze_format_string::ParsePrintfString;
224 ParsePrintfString(H&: *this, beg: PrintfFormatString.data(),
225 end: PrintfFormatString.data() + PrintfFormatString.size(),
226 LO: LangOpts, Target: Context->getTargetInfo(), isFreeBSDKPrintf: IsFreeBsdkPrintf);
227 finalizeFormatText();
228}
229
230void FormatStringConverter::emitAlignment(const PrintfSpecifier &FS,
231 std::string &FormatSpec) {
232 ConversionSpecifier::Kind ArgKind = FS.getConversionSpecifier().getKind();
233
234 // We only care about alignment if a field width is specified
235 if (FS.getFieldWidth().getHowSpecified() != OptionalAmount::NotSpecified) {
236 if (ArgKind == ConversionSpecifier::sArg) {
237 // Strings are left-aligned by default with std::format, so we only
238 // need to emit an alignment if this one needs to be right aligned.
239 if (!FS.isLeftJustified())
240 FormatSpec.push_back(c: '>');
241 } else {
242 // Numbers are right-aligned by default with std::format, so we only
243 // need to emit an alignment if this one needs to be left aligned.
244 if (FS.isLeftJustified())
245 FormatSpec.push_back(c: '<');
246 }
247 }
248}
249
250void FormatStringConverter::emitSign(const PrintfSpecifier &FS,
251 std::string &FormatSpec) {
252 const ConversionSpecifier Spec = FS.getConversionSpecifier();
253
254 // Ignore on something that isn't numeric. For printf it's would be a
255 // compile-time warning but ignored at runtime, but for std::format it
256 // ought to be a compile-time error.
257 if (Spec.isAnyIntArg() || Spec.isDoubleArg()) {
258 // + is preferred to ' '
259 if (FS.hasPlusPrefix())
260 FormatSpec.push_back(c: '+');
261 else if (FS.hasSpacePrefix())
262 FormatSpec.push_back(c: ' ');
263 }
264}
265
266void FormatStringConverter::emitAlternativeForm(const PrintfSpecifier &FS,
267 std::string &FormatSpec) {
268 if (FS.hasAlternativeForm()) {
269 switch (FS.getConversionSpecifier().getKind()) {
270 case ConversionSpecifier::Kind::aArg:
271 case ConversionSpecifier::Kind::AArg:
272 case ConversionSpecifier::Kind::eArg:
273 case ConversionSpecifier::Kind::EArg:
274 case ConversionSpecifier::Kind::fArg:
275 case ConversionSpecifier::Kind::FArg:
276 case ConversionSpecifier::Kind::gArg:
277 case ConversionSpecifier::Kind::GArg:
278 case ConversionSpecifier::Kind::xArg:
279 case ConversionSpecifier::Kind::XArg:
280 case ConversionSpecifier::Kind::oArg:
281 FormatSpec.push_back(c: '#');
282 break;
283 default:
284 // Alternative forms don't exist for other argument kinds
285 break;
286 }
287 }
288}
289
290void FormatStringConverter::emitFieldWidth(const PrintfSpecifier &FS,
291 std::string &FormatSpec) {
292 {
293 const OptionalAmount FieldWidth = FS.getFieldWidth();
294 switch (FieldWidth.getHowSpecified()) {
295 case OptionalAmount::NotSpecified:
296 break;
297 case OptionalAmount::Constant:
298 FormatSpec.append(str: llvm::utostr(X: FieldWidth.getConstantAmount()));
299 break;
300 case OptionalAmount::Arg:
301 FormatSpec.push_back(c: '{');
302 if (FieldWidth.usesPositionalArg()) {
303 // std::format argument identifiers are zero-based, whereas printf
304 // ones are one based.
305 assert(FieldWidth.getPositionalArgIndex() > 0U);
306 FormatSpec.append(str: llvm::utostr(X: FieldWidth.getPositionalArgIndex() - 1));
307 }
308 FormatSpec.push_back(c: '}');
309 break;
310 case OptionalAmount::Invalid:
311 break;
312 }
313 }
314}
315
316void FormatStringConverter::emitPrecision(const PrintfSpecifier &FS,
317 std::string &FormatSpec) {
318 const OptionalAmount FieldPrecision = FS.getPrecision();
319 switch (FieldPrecision.getHowSpecified()) {
320 case OptionalAmount::NotSpecified:
321 break;
322 case OptionalAmount::Constant:
323 FormatSpec.push_back(c: '.');
324 FormatSpec.append(str: llvm::utostr(X: FieldPrecision.getConstantAmount()));
325 break;
326 case OptionalAmount::Arg:
327 FormatSpec.push_back(c: '.');
328 FormatSpec.push_back(c: '{');
329 if (FieldPrecision.usesPositionalArg()) {
330 // std::format argument identifiers are zero-based, whereas printf
331 // ones are one based.
332 assert(FieldPrecision.getPositionalArgIndex() > 0U);
333 FormatSpec.append(
334 str: llvm::utostr(X: FieldPrecision.getPositionalArgIndex() - 1));
335 }
336 FormatSpec.push_back(c: '}');
337 break;
338 case OptionalAmount::Invalid:
339 break;
340 }
341}
342
343void FormatStringConverter::maybeRotateArguments(const PrintfSpecifier &FS) {
344 unsigned ArgCount = 0;
345 const OptionalAmount FieldWidth = FS.getFieldWidth();
346 const OptionalAmount FieldPrecision = FS.getPrecision();
347
348 if (FieldWidth.getHowSpecified() == OptionalAmount::Arg &&
349 !FieldWidth.usesPositionalArg())
350 ++ArgCount;
351 if (FieldPrecision.getHowSpecified() == OptionalAmount::Arg &&
352 !FieldPrecision.usesPositionalArg())
353 ++ArgCount;
354
355 if (ArgCount)
356 ArgRotates.emplace_back(args: FS.getArgIndex() + ArgsOffset, args&: ArgCount);
357}
358
359void FormatStringConverter::emitStringArgument(unsigned ArgIndex,
360 const Expr *Arg) {
361 // If the argument is the result of a call to std::string::c_str() or
362 // data() with a return type of char then we can remove that call and
363 // pass the std::string directly. We don't want to do so if the return
364 // type is not a char pointer (though it's unlikely that such code would
365 // compile without warnings anyway.) See RedundantStringCStrCheck.
366
367 if (!StringCStrCallExprMatcher) {
368 // Lazily create the matcher
369 const auto StringDecl = type(hasUnqualifiedDesugaredType(InnerMatcher: recordType(
370 hasDeclaration(InnerMatcher: cxxRecordDecl(hasName(Name: "::std::basic_string"))))));
371 const auto StringExpr = expr(
372 anyOf(hasType(InnerMatcher: StringDecl), hasType(InnerMatcher: qualType(pointsTo(InnerMatcher: StringDecl)))));
373
374 StringCStrCallExprMatcher =
375 cxxMemberCallExpr(
376 on(InnerMatcher: StringExpr.bind(ID: "arg")), callee(InnerMatcher: memberExpr().bind(ID: "member")),
377 callee(InnerMatcher: cxxMethodDecl(hasAnyName("c_str", "data"),
378 returns(InnerMatcher: pointerType(pointee(isRealChar()))))))
379 .bind(ID: "call");
380 }
381
382 auto CStrMatches = match(Matcher: *StringCStrCallExprMatcher, Node: *Arg, Context&: *Context);
383 if (CStrMatches.size() == 1)
384 ArgCStrRemovals.push_back(x: CStrMatches.front());
385 else if (Arg->getType()->isPointerType()) {
386 const QualType Pointee = Arg->getType()->getPointeeType();
387 // printf is happy to print signed char and unsigned char strings, but
388 // std::format only likes char strings.
389 if (Pointee->isCharType() && !isRealCharType(Ty: Pointee))
390 ArgFixes.emplace_back(args&: ArgIndex, args: "reinterpret_cast<const char *>(");
391 }
392}
393
394bool FormatStringConverter::emitIntegerArgument(
395 ConversionSpecifier::Kind ArgKind, const Expr *Arg, unsigned ArgIndex,
396 std::string &FormatSpec) {
397 const clang::QualType &ArgType = Arg->getType();
398 if (ArgType->isBooleanType()) {
399 // std::format will print bool as either "true" or "false" by default,
400 // but printf prints them as "0" or "1". Be compatible with printf by
401 // requesting decimal output.
402 FormatSpec.push_back(c: 'd');
403 } else if (ArgType->isEnumeralType()) {
404 // std::format will try to find a specialization to print the enum
405 // (and probably fail), whereas printf would have just expected it to
406 // be passed as its underlying type. However, printf will have forced
407 // the signedness based on the format string, so we need to do the
408 // same.
409 if (const auto *ET = ArgType->getAs<EnumType>()) {
410 if (const std::optional<std::string> MaybeCastType =
411 castTypeForArgument(ArgKind, QT: ET->getDecl()->getIntegerType()))
412 ArgFixes.emplace_back(
413 args&: ArgIndex, args: (Twine("static_cast<") + *MaybeCastType + ">(").str());
414 else
415 return conversionNotPossible(
416 Reason: (Twine("argument ") + Twine(ArgIndex) + " has unexpected enum type")
417 .str());
418 }
419 } else if (CastMismatchedIntegerTypes &&
420 !isMatchingSignedness(ArgKind, ArgType)) {
421 // printf will happily print an unsigned type as signed if told to.
422 // Even -Wformat doesn't warn for this. std::format will format as
423 // unsigned unless we cast it.
424 if (const std::optional<std::string> MaybeCastType =
425 castTypeForArgument(ArgKind, QT: ArgType))
426 ArgFixes.emplace_back(
427 args&: ArgIndex, args: (Twine("static_cast<") + *MaybeCastType + ">(").str());
428 else
429 return conversionNotPossible(
430 Reason: (Twine("argument ") + Twine(ArgIndex) + " cannot be cast to " +
431 Twine(ArgKind == ConversionSpecifier::Kind::uArg ? "unsigned"
432 : "signed") +
433 " integer type to match format"
434 " specifier and StrictMode is enabled")
435 .str());
436 } else if (isRealCharType(Ty: ArgType) || !ArgType->isIntegerType()) {
437 // Only specify integer if the argument is of a different type
438 FormatSpec.push_back(c: 'd');
439 }
440 return true;
441}
442
443/// Append the corresponding standard format string type fragment to FormatSpec,
444/// and store any argument fixes for later application.
445/// @returns true on success, false on failure
446bool FormatStringConverter::emitType(const PrintfSpecifier &FS, const Expr *Arg,
447 std::string &FormatSpec) {
448 ConversionSpecifier::Kind ArgKind = FS.getConversionSpecifier().getKind();
449 switch (ArgKind) {
450 case ConversionSpecifier::Kind::sArg:
451 emitStringArgument(ArgIndex: FS.getArgIndex() + ArgsOffset, Arg);
452 break;
453 case ConversionSpecifier::Kind::cArg:
454 // The type must be "c" to get a character unless the type is exactly
455 // char (whether that be signed or unsigned for the target.)
456 if (!isRealCharType(Ty: Arg->getType()))
457 FormatSpec.push_back(c: 'c');
458 break;
459 case ConversionSpecifier::Kind::dArg:
460 case ConversionSpecifier::Kind::iArg:
461 case ConversionSpecifier::Kind::uArg:
462 if (!emitIntegerArgument(ArgKind, Arg, ArgIndex: FS.getArgIndex() + ArgsOffset,
463 FormatSpec))
464 return false;
465 break;
466 case ConversionSpecifier::Kind::pArg: {
467 const clang::QualType &ArgType = Arg->getType();
468 // std::format knows how to format void pointers and nullptrs
469 if (!ArgType->isNullPtrType() && !ArgType->isVoidPointerType())
470 ArgFixes.emplace_back(args: FS.getArgIndex() + ArgsOffset,
471 args: "static_cast<const void *>(");
472 break;
473 }
474 case ConversionSpecifier::Kind::xArg:
475 FormatSpec.push_back(c: 'x');
476 break;
477 case ConversionSpecifier::Kind::XArg:
478 FormatSpec.push_back(c: 'X');
479 break;
480 case ConversionSpecifier::Kind::oArg:
481 FormatSpec.push_back(c: 'o');
482 break;
483 case ConversionSpecifier::Kind::aArg:
484 FormatSpec.push_back(c: 'a');
485 break;
486 case ConversionSpecifier::Kind::AArg:
487 FormatSpec.push_back(c: 'A');
488 break;
489 case ConversionSpecifier::Kind::eArg:
490 FormatSpec.push_back(c: 'e');
491 break;
492 case ConversionSpecifier::Kind::EArg:
493 FormatSpec.push_back(c: 'E');
494 break;
495 case ConversionSpecifier::Kind::fArg:
496 FormatSpec.push_back(c: 'f');
497 break;
498 case ConversionSpecifier::Kind::FArg:
499 FormatSpec.push_back(c: 'F');
500 break;
501 case ConversionSpecifier::Kind::gArg:
502 FormatSpec.push_back(c: 'g');
503 break;
504 case ConversionSpecifier::Kind::GArg:
505 FormatSpec.push_back(c: 'G');
506 break;
507 default:
508 // Something we don't understand
509 return conversionNotPossible(Reason: (Twine("argument ") +
510 Twine(FS.getArgIndex() + ArgsOffset) +
511 " has an unsupported format specifier")
512 .str());
513 }
514
515 return true;
516}
517
518/// Append the standard format string equivalent of the passed PrintfSpecifier
519/// to StandardFormatString and store any argument fixes for later application.
520/// @returns true on success, false on failure
521bool FormatStringConverter::convertArgument(const PrintfSpecifier &FS,
522 const Expr *Arg,
523 std::string &StandardFormatString) {
524 // The specifier must have an associated argument
525 assert(FS.consumesDataArgument());
526
527 StandardFormatString.push_back(c: '{');
528
529 if (FS.usesPositionalArg()) {
530 // std::format argument identifiers are zero-based, whereas printf ones
531 // are one based.
532 assert(FS.getPositionalArgIndex() > 0U);
533 StandardFormatString.append(str: llvm::utostr(X: FS.getPositionalArgIndex() - 1));
534 }
535
536 // std::format format argument parts to potentially emit:
537 // [[fill]align][sign]["#"]["0"][width]["."precision][type]
538 std::string FormatSpec;
539
540 // printf doesn't support specifying the fill character - it's always a
541 // space, so we never need to generate one.
542
543 emitAlignment(FS, FormatSpec);
544 emitSign(FS, FormatSpec);
545 emitAlternativeForm(FS, FormatSpec);
546
547 if (FS.hasLeadingZeros())
548 FormatSpec.push_back(c: '0');
549
550 emitFieldWidth(FS, FormatSpec);
551 emitPrecision(FS, FormatSpec);
552 maybeRotateArguments(FS);
553
554 if (!emitType(FS, Arg, FormatSpec))
555 return false;
556
557 if (!FormatSpec.empty()) {
558 StandardFormatString.push_back(c: ':');
559 StandardFormatString.append(str: FormatSpec);
560 }
561
562 StandardFormatString.push_back(c: '}');
563 return true;
564}
565
566/// Called for each format specifier by ParsePrintfString.
567bool FormatStringConverter::HandlePrintfSpecifier(const PrintfSpecifier &FS,
568 const char *StartSpecifier,
569 unsigned SpecifierLen,
570 const TargetInfo &Target) {
571
572 const size_t StartSpecifierPos = StartSpecifier - PrintfFormatString.data();
573 assert(StartSpecifierPos + SpecifierLen <= PrintfFormatString.size());
574
575 // Everything before the specifier needs copying verbatim
576 assert(StartSpecifierPos >= PrintfFormatStringPos);
577
578 appendFormatText(Text: StringRef(PrintfFormatString.begin() + PrintfFormatStringPos,
579 StartSpecifierPos - PrintfFormatStringPos));
580
581 const ConversionSpecifier::Kind ArgKind =
582 FS.getConversionSpecifier().getKind();
583
584 // Skip over specifier
585 PrintfFormatStringPos = StartSpecifierPos + SpecifierLen;
586 assert(PrintfFormatStringPos <= PrintfFormatString.size());
587
588 FormatStringNeededRewriting = true;
589
590 if (ArgKind == ConversionSpecifier::Kind::nArg) {
591 // std::print doesn't do the equivalent of %n
592 return conversionNotPossible(Reason: "'%n' is not supported in format string");
593 }
594
595 if (ArgKind == ConversionSpecifier::Kind::PrintErrno) {
596 // std::print doesn't support %m. In theory we could insert a
597 // strerror(errno) parameter (assuming that libc has a thread-safe
598 // implementation, which glibc does), but that would require keeping track
599 // of the input and output parameter indices for position arguments too.
600 return conversionNotPossible(Reason: "'%m' is not supported in format string");
601 }
602
603 if (ArgKind == ConversionSpecifier::PercentArg) {
604 StandardFormatString.push_back(c: '%');
605 return true;
606 }
607
608 const unsigned ArgIndex = FS.getArgIndex() + ArgsOffset;
609 if (ArgIndex >= NumArgs) {
610 // Argument index out of range. Give up.
611 return conversionNotPossible(
612 Reason: (Twine("argument index ") + Twine(ArgIndex) + " is out of range")
613 .str());
614 }
615
616 return convertArgument(FS, Arg: Args[ArgIndex]->IgnoreImplicitAsWritten(),
617 StandardFormatString);
618}
619
620/// Called at the very end just before applying fixes to capture the last part
621/// of the format string.
622void FormatStringConverter::finalizeFormatText() {
623 appendFormatText(
624 Text: StringRef(PrintfFormatString.begin() + PrintfFormatStringPos,
625 PrintfFormatString.size() - PrintfFormatStringPos));
626 PrintfFormatStringPos = PrintfFormatString.size();
627
628 // It's clearer to convert printf("Hello\r\n"); to std::print("Hello\r\n")
629 // than to std::println("Hello\r");
630 if (StringRef(StandardFormatString).ends_with(Suffix: "\\n") &&
631 !StringRef(StandardFormatString).ends_with(Suffix: "\\\\n") &&
632 !StringRef(StandardFormatString).ends_with(Suffix: "\\r\\n")) {
633 UsePrintNewlineFunction = true;
634 FormatStringNeededRewriting = true;
635 StandardFormatString.erase(first: StandardFormatString.end() - 2,
636 last: StandardFormatString.end());
637 }
638
639 StandardFormatString.push_back(c: '\"');
640}
641
642/// Append literal parts of the format text, reinstating escapes as required.
643void FormatStringConverter::appendFormatText(const StringRef Text) {
644 for (const char Ch : Text) {
645 if (Ch == '\a')
646 StandardFormatString += "\\a";
647 else if (Ch == '\b')
648 StandardFormatString += "\\b";
649 else if (Ch == '\f')
650 StandardFormatString += "\\f";
651 else if (Ch == '\n')
652 StandardFormatString += "\\n";
653 else if (Ch == '\r')
654 StandardFormatString += "\\r";
655 else if (Ch == '\t')
656 StandardFormatString += "\\t";
657 else if (Ch == '\v')
658 StandardFormatString += "\\v";
659 else if (Ch == '\"')
660 StandardFormatString += "\\\"";
661 else if (Ch == '\\')
662 StandardFormatString += "\\\\";
663 else if (Ch == '{') {
664 StandardFormatString += "{{";
665 FormatStringNeededRewriting = true;
666 } else if (Ch == '}') {
667 StandardFormatString += "}}";
668 FormatStringNeededRewriting = true;
669 } else if (Ch < 32) {
670 StandardFormatString += "\\x";
671 StandardFormatString += llvm::hexdigit(X: Ch >> 4, LowerCase: true);
672 StandardFormatString += llvm::hexdigit(X: Ch & 0xf, LowerCase: true);
673 } else
674 StandardFormatString += Ch;
675 }
676}
677
678static std::string withoutCStrReplacement(const BoundNodes &CStrRemovalMatch,
679 ASTContext &Context) {
680 const auto *Arg = CStrRemovalMatch.getNodeAs<Expr>(ID: "arg");
681 const auto *Member = CStrRemovalMatch.getNodeAs<MemberExpr>(ID: "member");
682 const bool Arrow = Member->isArrow();
683 return Arrow ? utils::fixit::formatDereference(ExprNode: *Arg, Context)
684 : tooling::fixit::getText(Node: *Arg, Context).str();
685}
686
687/// Called by the check when it is ready to apply the fixes.
688void FormatStringConverter::applyFixes(DiagnosticBuilder &Diag,
689 SourceManager &SM) {
690 if (FormatStringNeededRewriting) {
691 Diag << FixItHint::CreateReplacement(
692 RemoveRange: CharSourceRange::getTokenRange(B: FormatExpr->getBeginLoc(),
693 E: FormatExpr->getEndLoc()),
694 Code: StandardFormatString);
695 }
696
697 // ArgCount is one less than the number of arguments to be rotated.
698 for (auto [ValueArgIndex, ArgCount] : ArgRotates) {
699 assert(ValueArgIndex < NumArgs);
700 assert(ValueArgIndex > ArgCount);
701
702 // First move the value argument to the right place. But if there's a
703 // pending c_str() removal then we must do that at the same time.
704 if (const auto CStrRemovalMatch =
705 std::find_if(first: ArgCStrRemovals.cbegin(), last: ArgCStrRemovals.cend(),
706 pred: [ArgStartPos = Args[ValueArgIndex]->getBeginLoc()](
707 const BoundNodes &Match) {
708 // This c_str() removal corresponds to the argument
709 // being moved if they start at the same location.
710 const Expr *CStrArg = Match.getNodeAs<Expr>(ID: "arg");
711 return ArgStartPos == CStrArg->getBeginLoc();
712 });
713 CStrRemovalMatch != ArgCStrRemovals.end()) {
714 const std::string ArgText =
715 withoutCStrReplacement(CStrRemovalMatch: *CStrRemovalMatch, Context&: *Context);
716 assert(!ArgText.empty());
717
718 Diag << FixItHint::CreateReplacement(
719 Args[ValueArgIndex - ArgCount]->getSourceRange(), ArgText);
720
721 // That c_str() removal is now dealt with, so we don't need to do it again
722 ArgCStrRemovals.erase(position: CStrRemovalMatch);
723 } else
724 Diag << tooling::fixit::createReplacement(Destination: *Args[ValueArgIndex - ArgCount],
725 Source: *Args[ValueArgIndex], Context: *Context);
726
727 // Now shift down the field width and precision (if either are present) to
728 // accommodate it.
729 for (size_t Offset = 0; Offset < ArgCount; ++Offset)
730 Diag << tooling::fixit::createReplacement(
731 Destination: *Args[ValueArgIndex - Offset], Source: *Args[ValueArgIndex - Offset - 1],
732 Context: *Context);
733
734 // Now we need to modify the ArgFix index too so that we fix the right
735 // argument. We don't need to care about the width and precision indices
736 // since they never need fixing.
737 for (auto &ArgFix : ArgFixes) {
738 if (ArgFix.ArgIndex == ValueArgIndex)
739 ArgFix.ArgIndex = ValueArgIndex - ArgCount;
740 }
741 }
742
743 for (const auto &[ArgIndex, Replacement] : ArgFixes) {
744 SourceLocation AfterOtherSide =
745 Lexer::findNextToken(Loc: Args[ArgIndex]->getEndLoc(), SM, LangOpts)
746 ->getLocation();
747
748 Diag << FixItHint::CreateInsertion(InsertionLoc: Args[ArgIndex]->getBeginLoc(),
749 Code: Replacement, BeforePreviousInsertions: true)
750 << FixItHint::CreateInsertion(InsertionLoc: AfterOtherSide, Code: ")", BeforePreviousInsertions: true);
751 }
752
753 for (const auto &Match : ArgCStrRemovals) {
754 const auto *Call = Match.getNodeAs<CallExpr>(ID: "call");
755 const std::string ArgText = withoutCStrReplacement(CStrRemovalMatch: Match, Context&: *Context);
756 if (!ArgText.empty())
757 Diag << FixItHint::CreateReplacement(Call->getSourceRange(), ArgText);
758 }
759}
760} // namespace clang::tidy::utils
761

source code of clang-tools-extra/clang-tidy/utils/FormatStringConverter.cpp