1 | // FormatString.cpp - Common stuff for handling printf/scanf formats -*- C++ -*- |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // Shared details for processing format strings of printf and scanf |
10 | // (and friends). |
11 | // |
12 | //===----------------------------------------------------------------------===// |
13 | |
14 | #include "FormatStringParsing.h" |
15 | #include "clang/Basic/LangOptions.h" |
16 | #include "clang/Basic/TargetInfo.h" |
17 | #include "llvm/Support/ConvertUTF.h" |
18 | #include <optional> |
19 | |
20 | using clang::analyze_format_string::ArgType; |
21 | using clang::analyze_format_string::FormatStringHandler; |
22 | using clang::analyze_format_string::FormatSpecifier; |
23 | using clang::analyze_format_string::LengthModifier; |
24 | using clang::analyze_format_string::OptionalAmount; |
25 | using clang::analyze_format_string::ConversionSpecifier; |
26 | using namespace clang; |
27 | |
28 | // Key function to FormatStringHandler. |
29 | FormatStringHandler::~FormatStringHandler() {} |
30 | |
31 | //===----------------------------------------------------------------------===// |
32 | // Functions for parsing format strings components in both printf and |
33 | // scanf format strings. |
34 | //===----------------------------------------------------------------------===// |
35 | |
36 | OptionalAmount |
37 | clang::analyze_format_string::ParseAmount(const char *&Beg, const char *E) { |
38 | const char *I = Beg; |
39 | UpdateOnReturn <const char*> UpdateBeg(Beg, I); |
40 | |
41 | unsigned accumulator = 0; |
42 | bool hasDigits = false; |
43 | |
44 | for ( ; I != E; ++I) { |
45 | char c = *I; |
46 | if (c >= '0' && c <= '9') { |
47 | hasDigits = true; |
48 | accumulator = (accumulator * 10) + (c - '0'); |
49 | continue; |
50 | } |
51 | |
52 | if (hasDigits) |
53 | return OptionalAmount(OptionalAmount::Constant, accumulator, Beg, I - Beg, |
54 | false); |
55 | |
56 | break; |
57 | } |
58 | |
59 | return OptionalAmount(); |
60 | } |
61 | |
62 | OptionalAmount |
63 | clang::analyze_format_string::ParseNonPositionAmount(const char *&Beg, |
64 | const char *E, |
65 | unsigned &argIndex) { |
66 | if (*Beg == '*') { |
67 | ++Beg; |
68 | return OptionalAmount(OptionalAmount::Arg, argIndex++, Beg, 0, false); |
69 | } |
70 | |
71 | return ParseAmount(Beg, E); |
72 | } |
73 | |
74 | OptionalAmount |
75 | clang::analyze_format_string::ParsePositionAmount(FormatStringHandler &H, |
76 | const char *Start, |
77 | const char *&Beg, |
78 | const char *E, |
79 | PositionContext p) { |
80 | if (*Beg == '*') { |
81 | const char *I = Beg + 1; |
82 | const OptionalAmount &Amt = ParseAmount(Beg&: I, E); |
83 | |
84 | if (Amt.getHowSpecified() == OptionalAmount::NotSpecified) { |
85 | H.HandleInvalidPosition(startPos: Beg, posLen: I - Beg, p); |
86 | return OptionalAmount(false); |
87 | } |
88 | |
89 | if (I == E) { |
90 | // No more characters left? |
91 | H.HandleIncompleteSpecifier(startSpecifier: Start, specifierLen: E - Start); |
92 | return OptionalAmount(false); |
93 | } |
94 | |
95 | assert(Amt.getHowSpecified() == OptionalAmount::Constant); |
96 | |
97 | if (*I == '$') { |
98 | // Handle positional arguments |
99 | |
100 | // Special case: '*0$', since this is an easy mistake. |
101 | if (Amt.getConstantAmount() == 0) { |
102 | H.HandleZeroPosition(startPos: Beg, posLen: I - Beg + 1); |
103 | return OptionalAmount(false); |
104 | } |
105 | |
106 | const char *Tmp = Beg; |
107 | Beg = ++I; |
108 | |
109 | return OptionalAmount(OptionalAmount::Arg, Amt.getConstantAmount() - 1, |
110 | Tmp, 0, true); |
111 | } |
112 | |
113 | H.HandleInvalidPosition(startPos: Beg, posLen: I - Beg, p); |
114 | return OptionalAmount(false); |
115 | } |
116 | |
117 | return ParseAmount(Beg, E); |
118 | } |
119 | |
120 | |
121 | bool |
122 | clang::analyze_format_string::ParseFieldWidth(FormatStringHandler &H, |
123 | FormatSpecifier &CS, |
124 | const char *Start, |
125 | const char *&Beg, const char *E, |
126 | unsigned *argIndex) { |
127 | // FIXME: Support negative field widths. |
128 | if (argIndex) { |
129 | CS.setFieldWidth(ParseNonPositionAmount(Beg, E, argIndex&: *argIndex)); |
130 | } |
131 | else { |
132 | const OptionalAmount Amt = |
133 | ParsePositionAmount(H, Start, Beg, E, |
134 | p: analyze_format_string::FieldWidthPos); |
135 | |
136 | if (Amt.isInvalid()) |
137 | return true; |
138 | CS.setFieldWidth(Amt); |
139 | } |
140 | return false; |
141 | } |
142 | |
143 | bool |
144 | clang::analyze_format_string::ParseArgPosition(FormatStringHandler &H, |
145 | FormatSpecifier &FS, |
146 | const char *Start, |
147 | const char *&Beg, |
148 | const char *E) { |
149 | const char *I = Beg; |
150 | |
151 | const OptionalAmount &Amt = ParseAmount(Beg&: I, E); |
152 | |
153 | if (I == E) { |
154 | // No more characters left? |
155 | H.HandleIncompleteSpecifier(startSpecifier: Start, specifierLen: E - Start); |
156 | return true; |
157 | } |
158 | |
159 | if (Amt.getHowSpecified() == OptionalAmount::Constant && *(I++) == '$') { |
160 | // Warn that positional arguments are non-standard. |
161 | H.HandlePosition(startPos: Start, posLen: I - Start); |
162 | |
163 | // Special case: '%0$', since this is an easy mistake. |
164 | if (Amt.getConstantAmount() == 0) { |
165 | H.HandleZeroPosition(startPos: Start, posLen: I - Start); |
166 | return true; |
167 | } |
168 | |
169 | FS.setArgIndex(Amt.getConstantAmount() - 1); |
170 | FS.setUsesPositionalArg(); |
171 | // Update the caller's pointer if we decided to consume |
172 | // these characters. |
173 | Beg = I; |
174 | return false; |
175 | } |
176 | |
177 | return false; |
178 | } |
179 | |
180 | bool |
181 | clang::analyze_format_string::ParseVectorModifier(FormatStringHandler &H, |
182 | FormatSpecifier &FS, |
183 | const char *&I, |
184 | const char *E, |
185 | const LangOptions &LO) { |
186 | if (!LO.OpenCL) |
187 | return false; |
188 | |
189 | const char *Start = I; |
190 | if (*I == 'v') { |
191 | ++I; |
192 | |
193 | if (I == E) { |
194 | H.HandleIncompleteSpecifier(startSpecifier: Start, specifierLen: E - Start); |
195 | return true; |
196 | } |
197 | |
198 | OptionalAmount NumElts = ParseAmount(Beg&: I, E); |
199 | if (NumElts.getHowSpecified() != OptionalAmount::Constant) { |
200 | H.HandleIncompleteSpecifier(startSpecifier: Start, specifierLen: E - Start); |
201 | return true; |
202 | } |
203 | |
204 | FS.setVectorNumElts(NumElts); |
205 | } |
206 | |
207 | return false; |
208 | } |
209 | |
210 | bool |
211 | clang::analyze_format_string::ParseLengthModifier(FormatSpecifier &FS, |
212 | const char *&I, |
213 | const char *E, |
214 | const LangOptions &LO, |
215 | bool IsScanf) { |
216 | LengthModifier::Kind lmKind = LengthModifier::None; |
217 | const char *lmPosition = I; |
218 | switch (*I) { |
219 | default: |
220 | return false; |
221 | case 'h': |
222 | ++I; |
223 | if (I != E && *I == 'h') { |
224 | ++I; |
225 | lmKind = LengthModifier::AsChar; |
226 | } else if (I != E && *I == 'l' && LO.OpenCL) { |
227 | ++I; |
228 | lmKind = LengthModifier::AsShortLong; |
229 | } else { |
230 | lmKind = LengthModifier::AsShort; |
231 | } |
232 | break; |
233 | case 'l': |
234 | ++I; |
235 | if (I != E && *I == 'l') { |
236 | ++I; |
237 | lmKind = LengthModifier::AsLongLong; |
238 | } else { |
239 | lmKind = LengthModifier::AsLong; |
240 | } |
241 | break; |
242 | case 'j': lmKind = LengthModifier::AsIntMax; ++I; break; |
243 | case 'z': lmKind = LengthModifier::AsSizeT; ++I; break; |
244 | case 't': lmKind = LengthModifier::AsPtrDiff; ++I; break; |
245 | case 'L': lmKind = LengthModifier::AsLongDouble; ++I; break; |
246 | case 'q': lmKind = LengthModifier::AsQuad; ++I; break; |
247 | case 'a': |
248 | if (IsScanf && !LO.C99 && !LO.CPlusPlus11) { |
249 | // For scanf in C90, look at the next character to see if this should |
250 | // be parsed as the GNU extension 'a' length modifier. If not, this |
251 | // will be parsed as a conversion specifier. |
252 | ++I; |
253 | if (I != E && (*I == 's' || *I == 'S' || *I == '[')) { |
254 | lmKind = LengthModifier::AsAllocate; |
255 | break; |
256 | } |
257 | --I; |
258 | } |
259 | return false; |
260 | case 'm': |
261 | if (IsScanf) { |
262 | lmKind = LengthModifier::AsMAllocate; |
263 | ++I; |
264 | break; |
265 | } |
266 | return false; |
267 | // printf: AsInt64, AsInt32, AsInt3264 |
268 | // scanf: AsInt64 |
269 | case 'I': |
270 | if (I + 1 != E && I + 2 != E) { |
271 | if (I[1] == '6' && I[2] == '4') { |
272 | I += 3; |
273 | lmKind = LengthModifier::AsInt64; |
274 | break; |
275 | } |
276 | if (IsScanf) |
277 | return false; |
278 | |
279 | if (I[1] == '3' && I[2] == '2') { |
280 | I += 3; |
281 | lmKind = LengthModifier::AsInt32; |
282 | break; |
283 | } |
284 | } |
285 | ++I; |
286 | lmKind = LengthModifier::AsInt3264; |
287 | break; |
288 | case 'w': |
289 | lmKind = LengthModifier::AsWide; ++I; break; |
290 | } |
291 | LengthModifier lm(lmPosition, lmKind); |
292 | FS.setLengthModifier(lm); |
293 | return true; |
294 | } |
295 | |
296 | bool clang::analyze_format_string::ParseUTF8InvalidSpecifier( |
297 | const char *SpecifierBegin, const char *FmtStrEnd, unsigned &Len) { |
298 | if (SpecifierBegin + 1 >= FmtStrEnd) |
299 | return false; |
300 | |
301 | const llvm::UTF8 *SB = |
302 | reinterpret_cast<const llvm::UTF8 *>(SpecifierBegin + 1); |
303 | const llvm::UTF8 *SE = reinterpret_cast<const llvm::UTF8 *>(FmtStrEnd); |
304 | const char FirstByte = *SB; |
305 | |
306 | // If the invalid specifier is a multibyte UTF-8 string, return the |
307 | // total length accordingly so that the conversion specifier can be |
308 | // properly updated to reflect a complete UTF-8 specifier. |
309 | unsigned NumBytes = llvm::getNumBytesForUTF8(firstByte: FirstByte); |
310 | if (NumBytes == 1) |
311 | return false; |
312 | if (SB + NumBytes > SE) |
313 | return false; |
314 | |
315 | Len = NumBytes + 1; |
316 | return true; |
317 | } |
318 | |
319 | //===----------------------------------------------------------------------===// |
320 | // Methods on ArgType. |
321 | //===----------------------------------------------------------------------===// |
322 | |
323 | clang::analyze_format_string::ArgType::MatchKind |
324 | ArgType::matchesType(ASTContext &C, QualType argTy) const { |
325 | // When using the format attribute in C++, you can receive a function or an |
326 | // array that will necessarily decay to a pointer when passed to the final |
327 | // format consumer. Apply decay before type comparison. |
328 | if (argTy->canDecayToPointerType()) |
329 | argTy = C.getDecayedType(T: argTy); |
330 | |
331 | if (Ptr) { |
332 | // It has to be a pointer. |
333 | const PointerType *PT = argTy->getAs<PointerType>(); |
334 | if (!PT) |
335 | return NoMatch; |
336 | |
337 | // We cannot write through a const qualified pointer. |
338 | if (PT->getPointeeType().isConstQualified()) |
339 | return NoMatch; |
340 | |
341 | argTy = PT->getPointeeType(); |
342 | } |
343 | |
344 | switch (K) { |
345 | case InvalidTy: |
346 | llvm_unreachable("ArgType must be valid" ); |
347 | |
348 | case UnknownTy: |
349 | return Match; |
350 | |
351 | case AnyCharTy: { |
352 | if (const auto *ETy = argTy->getAs<EnumType>()) { |
353 | // If the enum is incomplete we know nothing about the underlying type. |
354 | // Assume that it's 'int'. Do not use the underlying type for a scoped |
355 | // enumeration. |
356 | if (!ETy->getDecl()->isComplete()) |
357 | return NoMatch; |
358 | if (ETy->isUnscopedEnumerationType()) |
359 | argTy = ETy->getDecl()->getIntegerType(); |
360 | } |
361 | |
362 | if (const auto *BT = argTy->getAs<BuiltinType>()) { |
363 | // The types are perfectly matched? |
364 | switch (BT->getKind()) { |
365 | default: |
366 | break; |
367 | case BuiltinType::Char_S: |
368 | case BuiltinType::SChar: |
369 | case BuiltinType::UChar: |
370 | case BuiltinType::Char_U: |
371 | return Match; |
372 | case BuiltinType::Bool: |
373 | if (!Ptr) |
374 | return Match; |
375 | break; |
376 | } |
377 | // "Partially matched" because of promotions? |
378 | if (!Ptr) { |
379 | switch (BT->getKind()) { |
380 | default: |
381 | break; |
382 | case BuiltinType::Int: |
383 | case BuiltinType::UInt: |
384 | return MatchPromotion; |
385 | case BuiltinType::Short: |
386 | case BuiltinType::UShort: |
387 | case BuiltinType::WChar_S: |
388 | case BuiltinType::WChar_U: |
389 | return NoMatchPromotionTypeConfusion; |
390 | } |
391 | } |
392 | } |
393 | return NoMatch; |
394 | } |
395 | |
396 | case SpecificTy: { |
397 | if (const EnumType *ETy = argTy->getAs<EnumType>()) { |
398 | // If the enum is incomplete we know nothing about the underlying type. |
399 | // Assume that it's 'int'. Do not use the underlying type for a scoped |
400 | // enumeration as that needs an exact match. |
401 | if (!ETy->getDecl()->isComplete()) |
402 | argTy = C.IntTy; |
403 | else if (ETy->isUnscopedEnumerationType()) |
404 | argTy = ETy->getDecl()->getIntegerType(); |
405 | } |
406 | argTy = C.getCanonicalType(T: argTy).getUnqualifiedType(); |
407 | |
408 | if (T == argTy) |
409 | return Match; |
410 | if (const auto *BT = argTy->getAs<BuiltinType>()) { |
411 | // Check if the only difference between them is signed vs unsigned |
412 | // if true, we consider they are compatible. |
413 | switch (BT->getKind()) { |
414 | default: |
415 | break; |
416 | case BuiltinType::Bool: |
417 | if (Ptr && (T == C.UnsignedCharTy || T == C.SignedCharTy)) |
418 | return NoMatch; |
419 | [[fallthrough]]; |
420 | case BuiltinType::Char_S: |
421 | case BuiltinType::SChar: |
422 | case BuiltinType::Char_U: |
423 | case BuiltinType::UChar: |
424 | if (T == C.UnsignedShortTy || T == C.ShortTy) |
425 | return NoMatchTypeConfusion; |
426 | if (T == C.UnsignedCharTy || T == C.SignedCharTy) |
427 | return Match; |
428 | break; |
429 | case BuiltinType::Short: |
430 | if (T == C.UnsignedShortTy) |
431 | return Match; |
432 | break; |
433 | case BuiltinType::UShort: |
434 | if (T == C.ShortTy) |
435 | return Match; |
436 | break; |
437 | case BuiltinType::Int: |
438 | if (T == C.UnsignedIntTy) |
439 | return Match; |
440 | break; |
441 | case BuiltinType::UInt: |
442 | if (T == C.IntTy) |
443 | return Match; |
444 | break; |
445 | case BuiltinType::Long: |
446 | if (T == C.UnsignedLongTy) |
447 | return Match; |
448 | break; |
449 | case BuiltinType::ULong: |
450 | if (T == C.LongTy) |
451 | return Match; |
452 | break; |
453 | case BuiltinType::LongLong: |
454 | if (T == C.UnsignedLongLongTy) |
455 | return Match; |
456 | break; |
457 | case BuiltinType::ULongLong: |
458 | if (T == C.LongLongTy) |
459 | return Match; |
460 | break; |
461 | } |
462 | // "Partially matched" because of promotions? |
463 | if (!Ptr) { |
464 | switch (BT->getKind()) { |
465 | default: |
466 | break; |
467 | case BuiltinType::Bool: |
468 | if (T == C.IntTy || T == C.UnsignedIntTy) |
469 | return MatchPromotion; |
470 | break; |
471 | case BuiltinType::Int: |
472 | case BuiltinType::UInt: |
473 | if (T == C.SignedCharTy || T == C.UnsignedCharTy || |
474 | T == C.ShortTy || T == C.UnsignedShortTy || T == C.WCharTy || |
475 | T == C.WideCharTy) |
476 | return MatchPromotion; |
477 | break; |
478 | case BuiltinType::Char_U: |
479 | if (T == C.UnsignedIntTy) |
480 | return MatchPromotion; |
481 | if (T == C.UnsignedShortTy) |
482 | return NoMatchPromotionTypeConfusion; |
483 | break; |
484 | case BuiltinType::Char_S: |
485 | if (T == C.IntTy) |
486 | return MatchPromotion; |
487 | if (T == C.ShortTy) |
488 | return NoMatchPromotionTypeConfusion; |
489 | break; |
490 | case BuiltinType::Half: |
491 | case BuiltinType::Float: |
492 | if (T == C.DoubleTy) |
493 | return MatchPromotion; |
494 | break; |
495 | case BuiltinType::Short: |
496 | case BuiltinType::UShort: |
497 | if (T == C.SignedCharTy || T == C.UnsignedCharTy) |
498 | return NoMatchPromotionTypeConfusion; |
499 | break; |
500 | case BuiltinType::WChar_U: |
501 | case BuiltinType::WChar_S: |
502 | if (T != C.WCharTy && T != C.WideCharTy) |
503 | return NoMatchPromotionTypeConfusion; |
504 | } |
505 | } |
506 | } |
507 | return NoMatch; |
508 | } |
509 | |
510 | case CStrTy: { |
511 | const PointerType *PT = argTy->getAs<PointerType>(); |
512 | if (!PT) |
513 | return NoMatch; |
514 | QualType pointeeTy = PT->getPointeeType(); |
515 | if (const BuiltinType *BT = pointeeTy->getAs<BuiltinType>()) |
516 | switch (BT->getKind()) { |
517 | case BuiltinType::Char_U: |
518 | case BuiltinType::UChar: |
519 | case BuiltinType::Char_S: |
520 | case BuiltinType::SChar: |
521 | return Match; |
522 | default: |
523 | break; |
524 | } |
525 | |
526 | return NoMatch; |
527 | } |
528 | |
529 | case WCStrTy: { |
530 | const PointerType *PT = argTy->getAs<PointerType>(); |
531 | if (!PT) |
532 | return NoMatch; |
533 | QualType pointeeTy = |
534 | C.getCanonicalType(T: PT->getPointeeType()).getUnqualifiedType(); |
535 | return pointeeTy == C.getWideCharType() ? Match : NoMatch; |
536 | } |
537 | |
538 | case WIntTy: { |
539 | QualType WInt = C.getCanonicalType(T: C.getWIntType()).getUnqualifiedType(); |
540 | |
541 | if (C.getCanonicalType(T: argTy).getUnqualifiedType() == WInt) |
542 | return Match; |
543 | |
544 | QualType PromoArg = C.isPromotableIntegerType(T: argTy) |
545 | ? C.getPromotedIntegerType(PromotableType: argTy) |
546 | : argTy; |
547 | PromoArg = C.getCanonicalType(T: PromoArg).getUnqualifiedType(); |
548 | |
549 | // If the promoted argument is the corresponding signed type of the |
550 | // wint_t type, then it should match. |
551 | if (PromoArg->hasSignedIntegerRepresentation() && |
552 | C.getCorrespondingUnsignedType(T: PromoArg) == WInt) |
553 | return Match; |
554 | |
555 | return WInt == PromoArg ? Match : NoMatch; |
556 | } |
557 | |
558 | case CPointerTy: |
559 | if (argTy->isVoidPointerType()) { |
560 | return Match; |
561 | } if (argTy->isPointerType() || argTy->isObjCObjectPointerType() || |
562 | argTy->isBlockPointerType() || argTy->isNullPtrType()) { |
563 | return NoMatchPedantic; |
564 | } else { |
565 | return NoMatch; |
566 | } |
567 | |
568 | case ObjCPointerTy: { |
569 | if (argTy->getAs<ObjCObjectPointerType>() || |
570 | argTy->getAs<BlockPointerType>()) |
571 | return Match; |
572 | |
573 | // Handle implicit toll-free bridging. |
574 | if (const PointerType *PT = argTy->getAs<PointerType>()) { |
575 | // Things such as CFTypeRef are really just opaque pointers |
576 | // to C structs representing CF types that can often be bridged |
577 | // to Objective-C objects. Since the compiler doesn't know which |
578 | // structs can be toll-free bridged, we just accept them all. |
579 | QualType pointee = PT->getPointeeType(); |
580 | if (pointee->getAsStructureType() || pointee->isVoidType()) |
581 | return Match; |
582 | } |
583 | return NoMatch; |
584 | } |
585 | } |
586 | |
587 | llvm_unreachable("Invalid ArgType Kind!" ); |
588 | } |
589 | |
590 | ArgType ArgType::makeVectorType(ASTContext &C, unsigned NumElts) const { |
591 | // Check for valid vector element types. |
592 | if (T.isNull()) |
593 | return ArgType::Invalid(); |
594 | |
595 | QualType Vec = C.getExtVectorType(T, NumElts); |
596 | return ArgType(Vec, Name); |
597 | } |
598 | |
599 | QualType ArgType::getRepresentativeType(ASTContext &C) const { |
600 | QualType Res; |
601 | switch (K) { |
602 | case InvalidTy: |
603 | llvm_unreachable("No representative type for Invalid ArgType" ); |
604 | case UnknownTy: |
605 | llvm_unreachable("No representative type for Unknown ArgType" ); |
606 | case AnyCharTy: |
607 | Res = C.CharTy; |
608 | break; |
609 | case SpecificTy: |
610 | Res = T; |
611 | break; |
612 | case CStrTy: |
613 | Res = C.getPointerType(C.CharTy); |
614 | break; |
615 | case WCStrTy: |
616 | Res = C.getPointerType(T: C.getWideCharType()); |
617 | break; |
618 | case ObjCPointerTy: |
619 | Res = C.ObjCBuiltinIdTy; |
620 | break; |
621 | case CPointerTy: |
622 | Res = C.VoidPtrTy; |
623 | break; |
624 | case WIntTy: { |
625 | Res = C.getWIntType(); |
626 | break; |
627 | } |
628 | } |
629 | |
630 | if (Ptr) |
631 | Res = C.getPointerType(T: Res); |
632 | return Res; |
633 | } |
634 | |
635 | std::string ArgType::getRepresentativeTypeName(ASTContext &C) const { |
636 | std::string S = getRepresentativeType(C).getAsString(Policy: C.getPrintingPolicy()); |
637 | |
638 | std::string Alias; |
639 | if (Name) { |
640 | // Use a specific name for this type, e.g. "size_t". |
641 | Alias = Name; |
642 | if (Ptr) { |
643 | // If ArgType is actually a pointer to T, append an asterisk. |
644 | Alias += (Alias[Alias.size()-1] == '*') ? "*" : " *" ; |
645 | } |
646 | // If Alias is the same as the underlying type, e.g. wchar_t, then drop it. |
647 | if (S == Alias) |
648 | Alias.clear(); |
649 | } |
650 | |
651 | if (!Alias.empty()) |
652 | return std::string("'" ) + Alias + "' (aka '" + S + "')" ; |
653 | return std::string("'" ) + S + "'" ; |
654 | } |
655 | |
656 | |
657 | //===----------------------------------------------------------------------===// |
658 | // Methods on OptionalAmount. |
659 | //===----------------------------------------------------------------------===// |
660 | |
661 | ArgType |
662 | analyze_format_string::OptionalAmount::getArgType(ASTContext &Ctx) const { |
663 | return Ctx.IntTy; |
664 | } |
665 | |
666 | //===----------------------------------------------------------------------===// |
667 | // Methods on LengthModifier. |
668 | //===----------------------------------------------------------------------===// |
669 | |
670 | const char * |
671 | analyze_format_string::LengthModifier::toString() const { |
672 | switch (kind) { |
673 | case AsChar: |
674 | return "hh" ; |
675 | case AsShort: |
676 | return "h" ; |
677 | case AsShortLong: |
678 | return "hl" ; |
679 | case AsLong: // or AsWideChar |
680 | return "l" ; |
681 | case AsLongLong: |
682 | return "ll" ; |
683 | case AsQuad: |
684 | return "q" ; |
685 | case AsIntMax: |
686 | return "j" ; |
687 | case AsSizeT: |
688 | return "z" ; |
689 | case AsPtrDiff: |
690 | return "t" ; |
691 | case AsInt32: |
692 | return "I32" ; |
693 | case AsInt3264: |
694 | return "I" ; |
695 | case AsInt64: |
696 | return "I64" ; |
697 | case AsLongDouble: |
698 | return "L" ; |
699 | case AsAllocate: |
700 | return "a" ; |
701 | case AsMAllocate: |
702 | return "m" ; |
703 | case AsWide: |
704 | return "w" ; |
705 | case None: |
706 | return "" ; |
707 | } |
708 | return nullptr; |
709 | } |
710 | |
711 | //===----------------------------------------------------------------------===// |
712 | // Methods on ConversionSpecifier. |
713 | //===----------------------------------------------------------------------===// |
714 | |
715 | const char *ConversionSpecifier::toString() const { |
716 | switch (kind) { |
717 | case bArg: return "b" ; |
718 | case BArg: return "B" ; |
719 | case dArg: return "d" ; |
720 | case DArg: return "D" ; |
721 | case iArg: return "i" ; |
722 | case oArg: return "o" ; |
723 | case OArg: return "O" ; |
724 | case uArg: return "u" ; |
725 | case UArg: return "U" ; |
726 | case xArg: return "x" ; |
727 | case XArg: return "X" ; |
728 | case fArg: return "f" ; |
729 | case FArg: return "F" ; |
730 | case eArg: return "e" ; |
731 | case EArg: return "E" ; |
732 | case gArg: return "g" ; |
733 | case GArg: return "G" ; |
734 | case aArg: return "a" ; |
735 | case AArg: return "A" ; |
736 | case cArg: return "c" ; |
737 | case sArg: return "s" ; |
738 | case pArg: return "p" ; |
739 | case PArg: |
740 | return "P" ; |
741 | case nArg: return "n" ; |
742 | case PercentArg: return "%" ; |
743 | case ScanListArg: return "[" ; |
744 | case InvalidSpecifier: return nullptr; |
745 | |
746 | // POSIX unicode extensions. |
747 | case CArg: return "C" ; |
748 | case SArg: return "S" ; |
749 | |
750 | // Objective-C specific specifiers. |
751 | case ObjCObjArg: return "@" ; |
752 | |
753 | // FreeBSD kernel specific specifiers. |
754 | case FreeBSDbArg: return "b" ; |
755 | case FreeBSDDArg: return "D" ; |
756 | case FreeBSDrArg: return "r" ; |
757 | case FreeBSDyArg: return "y" ; |
758 | |
759 | // GlibC specific specifiers. |
760 | case PrintErrno: return "m" ; |
761 | |
762 | // MS specific specifiers. |
763 | case ZArg: return "Z" ; |
764 | } |
765 | return nullptr; |
766 | } |
767 | |
768 | std::optional<ConversionSpecifier> |
769 | ConversionSpecifier::getStandardSpecifier() const { |
770 | ConversionSpecifier::Kind NewKind; |
771 | |
772 | switch (getKind()) { |
773 | default: |
774 | return std::nullopt; |
775 | case DArg: |
776 | NewKind = dArg; |
777 | break; |
778 | case UArg: |
779 | NewKind = uArg; |
780 | break; |
781 | case OArg: |
782 | NewKind = oArg; |
783 | break; |
784 | } |
785 | |
786 | ConversionSpecifier FixedCS(*this); |
787 | FixedCS.setKind(NewKind); |
788 | return FixedCS; |
789 | } |
790 | |
791 | //===----------------------------------------------------------------------===// |
792 | // Methods on OptionalAmount. |
793 | //===----------------------------------------------------------------------===// |
794 | |
795 | void OptionalAmount::toString(raw_ostream &os) const { |
796 | switch (hs) { |
797 | case Invalid: |
798 | case NotSpecified: |
799 | return; |
800 | case Arg: |
801 | if (UsesDotPrefix) |
802 | os << "." ; |
803 | if (usesPositionalArg()) |
804 | os << "*" << getPositionalArgIndex() << "$" ; |
805 | else |
806 | os << "*" ; |
807 | break; |
808 | case Constant: |
809 | if (UsesDotPrefix) |
810 | os << "." ; |
811 | os << amt; |
812 | break; |
813 | } |
814 | } |
815 | |
816 | bool FormatSpecifier::hasValidLengthModifier(const TargetInfo &Target, |
817 | const LangOptions &LO) const { |
818 | switch (LM.getKind()) { |
819 | case LengthModifier::None: |
820 | return true; |
821 | |
822 | // Handle most integer flags |
823 | case LengthModifier::AsShort: |
824 | // Length modifier only applies to FP vectors. |
825 | if (LO.OpenCL && CS.isDoubleArg()) |
826 | return !VectorNumElts.isInvalid(); |
827 | |
828 | if (Target.getTriple().isOSMSVCRT()) { |
829 | switch (CS.getKind()) { |
830 | case ConversionSpecifier::cArg: |
831 | case ConversionSpecifier::CArg: |
832 | case ConversionSpecifier::sArg: |
833 | case ConversionSpecifier::SArg: |
834 | case ConversionSpecifier::ZArg: |
835 | return true; |
836 | default: |
837 | break; |
838 | } |
839 | } |
840 | [[fallthrough]]; |
841 | case LengthModifier::AsChar: |
842 | case LengthModifier::AsLongLong: |
843 | case LengthModifier::AsQuad: |
844 | case LengthModifier::AsIntMax: |
845 | case LengthModifier::AsSizeT: |
846 | case LengthModifier::AsPtrDiff: |
847 | switch (CS.getKind()) { |
848 | case ConversionSpecifier::bArg: |
849 | case ConversionSpecifier::BArg: |
850 | case ConversionSpecifier::dArg: |
851 | case ConversionSpecifier::DArg: |
852 | case ConversionSpecifier::iArg: |
853 | case ConversionSpecifier::oArg: |
854 | case ConversionSpecifier::OArg: |
855 | case ConversionSpecifier::uArg: |
856 | case ConversionSpecifier::UArg: |
857 | case ConversionSpecifier::xArg: |
858 | case ConversionSpecifier::XArg: |
859 | case ConversionSpecifier::nArg: |
860 | return true; |
861 | case ConversionSpecifier::FreeBSDrArg: |
862 | case ConversionSpecifier::FreeBSDyArg: |
863 | return Target.getTriple().isOSFreeBSD() || Target.getTriple().isPS(); |
864 | default: |
865 | return false; |
866 | } |
867 | |
868 | case LengthModifier::AsShortLong: |
869 | return LO.OpenCL && !VectorNumElts.isInvalid(); |
870 | |
871 | // Handle 'l' flag |
872 | case LengthModifier::AsLong: // or AsWideChar |
873 | if (CS.isDoubleArg()) { |
874 | // Invalid for OpenCL FP scalars. |
875 | if (LO.OpenCL && VectorNumElts.isInvalid()) |
876 | return false; |
877 | return true; |
878 | } |
879 | |
880 | switch (CS.getKind()) { |
881 | case ConversionSpecifier::bArg: |
882 | case ConversionSpecifier::BArg: |
883 | case ConversionSpecifier::dArg: |
884 | case ConversionSpecifier::DArg: |
885 | case ConversionSpecifier::iArg: |
886 | case ConversionSpecifier::oArg: |
887 | case ConversionSpecifier::OArg: |
888 | case ConversionSpecifier::uArg: |
889 | case ConversionSpecifier::UArg: |
890 | case ConversionSpecifier::xArg: |
891 | case ConversionSpecifier::XArg: |
892 | case ConversionSpecifier::nArg: |
893 | case ConversionSpecifier::cArg: |
894 | case ConversionSpecifier::sArg: |
895 | case ConversionSpecifier::ScanListArg: |
896 | case ConversionSpecifier::ZArg: |
897 | return true; |
898 | case ConversionSpecifier::FreeBSDrArg: |
899 | case ConversionSpecifier::FreeBSDyArg: |
900 | return Target.getTriple().isOSFreeBSD() || Target.getTriple().isPS(); |
901 | default: |
902 | return false; |
903 | } |
904 | |
905 | case LengthModifier::AsLongDouble: |
906 | switch (CS.getKind()) { |
907 | case ConversionSpecifier::aArg: |
908 | case ConversionSpecifier::AArg: |
909 | case ConversionSpecifier::fArg: |
910 | case ConversionSpecifier::FArg: |
911 | case ConversionSpecifier::eArg: |
912 | case ConversionSpecifier::EArg: |
913 | case ConversionSpecifier::gArg: |
914 | case ConversionSpecifier::GArg: |
915 | return true; |
916 | // GNU libc extension. |
917 | case ConversionSpecifier::dArg: |
918 | case ConversionSpecifier::iArg: |
919 | case ConversionSpecifier::oArg: |
920 | case ConversionSpecifier::uArg: |
921 | case ConversionSpecifier::xArg: |
922 | case ConversionSpecifier::XArg: |
923 | return !Target.getTriple().isOSDarwin() && |
924 | !Target.getTriple().isOSWindows(); |
925 | default: |
926 | return false; |
927 | } |
928 | |
929 | case LengthModifier::AsAllocate: |
930 | switch (CS.getKind()) { |
931 | case ConversionSpecifier::sArg: |
932 | case ConversionSpecifier::SArg: |
933 | case ConversionSpecifier::ScanListArg: |
934 | return true; |
935 | default: |
936 | return false; |
937 | } |
938 | |
939 | case LengthModifier::AsMAllocate: |
940 | switch (CS.getKind()) { |
941 | case ConversionSpecifier::cArg: |
942 | case ConversionSpecifier::CArg: |
943 | case ConversionSpecifier::sArg: |
944 | case ConversionSpecifier::SArg: |
945 | case ConversionSpecifier::ScanListArg: |
946 | return true; |
947 | default: |
948 | return false; |
949 | } |
950 | case LengthModifier::AsInt32: |
951 | case LengthModifier::AsInt3264: |
952 | case LengthModifier::AsInt64: |
953 | switch (CS.getKind()) { |
954 | case ConversionSpecifier::dArg: |
955 | case ConversionSpecifier::iArg: |
956 | case ConversionSpecifier::oArg: |
957 | case ConversionSpecifier::uArg: |
958 | case ConversionSpecifier::xArg: |
959 | case ConversionSpecifier::XArg: |
960 | return Target.getTriple().isOSMSVCRT(); |
961 | default: |
962 | return false; |
963 | } |
964 | case LengthModifier::AsWide: |
965 | switch (CS.getKind()) { |
966 | case ConversionSpecifier::cArg: |
967 | case ConversionSpecifier::CArg: |
968 | case ConversionSpecifier::sArg: |
969 | case ConversionSpecifier::SArg: |
970 | case ConversionSpecifier::ZArg: |
971 | return Target.getTriple().isOSMSVCRT(); |
972 | default: |
973 | return false; |
974 | } |
975 | } |
976 | llvm_unreachable("Invalid LengthModifier Kind!" ); |
977 | } |
978 | |
979 | bool FormatSpecifier::hasStandardLengthModifier() const { |
980 | switch (LM.getKind()) { |
981 | case LengthModifier::None: |
982 | case LengthModifier::AsChar: |
983 | case LengthModifier::AsShort: |
984 | case LengthModifier::AsLong: |
985 | case LengthModifier::AsLongLong: |
986 | case LengthModifier::AsIntMax: |
987 | case LengthModifier::AsSizeT: |
988 | case LengthModifier::AsPtrDiff: |
989 | case LengthModifier::AsLongDouble: |
990 | return true; |
991 | case LengthModifier::AsAllocate: |
992 | case LengthModifier::AsMAllocate: |
993 | case LengthModifier::AsQuad: |
994 | case LengthModifier::AsInt32: |
995 | case LengthModifier::AsInt3264: |
996 | case LengthModifier::AsInt64: |
997 | case LengthModifier::AsWide: |
998 | case LengthModifier::AsShortLong: // ??? |
999 | return false; |
1000 | } |
1001 | llvm_unreachable("Invalid LengthModifier Kind!" ); |
1002 | } |
1003 | |
1004 | bool FormatSpecifier::hasStandardConversionSpecifier( |
1005 | const LangOptions &LangOpt) const { |
1006 | switch (CS.getKind()) { |
1007 | case ConversionSpecifier::bArg: |
1008 | case ConversionSpecifier::BArg: |
1009 | case ConversionSpecifier::cArg: |
1010 | case ConversionSpecifier::dArg: |
1011 | case ConversionSpecifier::iArg: |
1012 | case ConversionSpecifier::oArg: |
1013 | case ConversionSpecifier::uArg: |
1014 | case ConversionSpecifier::xArg: |
1015 | case ConversionSpecifier::XArg: |
1016 | case ConversionSpecifier::fArg: |
1017 | case ConversionSpecifier::FArg: |
1018 | case ConversionSpecifier::eArg: |
1019 | case ConversionSpecifier::EArg: |
1020 | case ConversionSpecifier::gArg: |
1021 | case ConversionSpecifier::GArg: |
1022 | case ConversionSpecifier::aArg: |
1023 | case ConversionSpecifier::AArg: |
1024 | case ConversionSpecifier::sArg: |
1025 | case ConversionSpecifier::pArg: |
1026 | case ConversionSpecifier::nArg: |
1027 | case ConversionSpecifier::ObjCObjArg: |
1028 | case ConversionSpecifier::ScanListArg: |
1029 | case ConversionSpecifier::PercentArg: |
1030 | case ConversionSpecifier::PArg: |
1031 | return true; |
1032 | case ConversionSpecifier::CArg: |
1033 | case ConversionSpecifier::SArg: |
1034 | return LangOpt.ObjC; |
1035 | case ConversionSpecifier::InvalidSpecifier: |
1036 | case ConversionSpecifier::FreeBSDbArg: |
1037 | case ConversionSpecifier::FreeBSDDArg: |
1038 | case ConversionSpecifier::FreeBSDrArg: |
1039 | case ConversionSpecifier::FreeBSDyArg: |
1040 | case ConversionSpecifier::PrintErrno: |
1041 | case ConversionSpecifier::DArg: |
1042 | case ConversionSpecifier::OArg: |
1043 | case ConversionSpecifier::UArg: |
1044 | case ConversionSpecifier::ZArg: |
1045 | return false; |
1046 | } |
1047 | llvm_unreachable("Invalid ConversionSpecifier Kind!" ); |
1048 | } |
1049 | |
1050 | bool FormatSpecifier::hasStandardLengthConversionCombination() const { |
1051 | if (LM.getKind() == LengthModifier::AsLongDouble) { |
1052 | switch(CS.getKind()) { |
1053 | case ConversionSpecifier::dArg: |
1054 | case ConversionSpecifier::iArg: |
1055 | case ConversionSpecifier::oArg: |
1056 | case ConversionSpecifier::uArg: |
1057 | case ConversionSpecifier::xArg: |
1058 | case ConversionSpecifier::XArg: |
1059 | return false; |
1060 | default: |
1061 | return true; |
1062 | } |
1063 | } |
1064 | return true; |
1065 | } |
1066 | |
1067 | std::optional<LengthModifier> |
1068 | FormatSpecifier::getCorrectedLengthModifier() const { |
1069 | if (CS.isAnyIntArg() || CS.getKind() == ConversionSpecifier::nArg) { |
1070 | if (LM.getKind() == LengthModifier::AsLongDouble || |
1071 | LM.getKind() == LengthModifier::AsQuad) { |
1072 | LengthModifier FixedLM(LM); |
1073 | FixedLM.setKind(LengthModifier::AsLongLong); |
1074 | return FixedLM; |
1075 | } |
1076 | } |
1077 | |
1078 | return std::nullopt; |
1079 | } |
1080 | |
1081 | bool FormatSpecifier::namedTypeToLengthModifier(QualType QT, |
1082 | LengthModifier &LM) { |
1083 | for (/**/; const auto *TT = QT->getAs<TypedefType>(); |
1084 | QT = TT->getDecl()->getUnderlyingType()) { |
1085 | const TypedefNameDecl *Typedef = TT->getDecl(); |
1086 | const IdentifierInfo *Identifier = Typedef->getIdentifier(); |
1087 | if (Identifier->getName() == "size_t" ) { |
1088 | LM.setKind(LengthModifier::AsSizeT); |
1089 | return true; |
1090 | } else if (Identifier->getName() == "ssize_t" ) { |
1091 | // Not C99, but common in Unix. |
1092 | LM.setKind(LengthModifier::AsSizeT); |
1093 | return true; |
1094 | } else if (Identifier->getName() == "intmax_t" ) { |
1095 | LM.setKind(LengthModifier::AsIntMax); |
1096 | return true; |
1097 | } else if (Identifier->getName() == "uintmax_t" ) { |
1098 | LM.setKind(LengthModifier::AsIntMax); |
1099 | return true; |
1100 | } else if (Identifier->getName() == "ptrdiff_t" ) { |
1101 | LM.setKind(LengthModifier::AsPtrDiff); |
1102 | return true; |
1103 | } |
1104 | } |
1105 | return false; |
1106 | } |
1107 | |