1//===- IdentifierTable.h - Hash table for identifier lookup -----*- C++ -*-===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10/// \file
11/// Defines the clang::IdentifierInfo, clang::IdentifierTable, and
12/// clang::Selector interfaces.
13//
14//===----------------------------------------------------------------------===//
15
16#ifndef LLVM_CLANG_BASIC_IDENTIFIERTABLE_H
17#define LLVM_CLANG_BASIC_IDENTIFIERTABLE_H
18
19#include "clang/Basic/LLVM.h"
20#include "clang/Basic/TokenKinds.h"
21#include "llvm/ADT/DenseMapInfo.h"
22#include "llvm/ADT/SmallString.h"
23#include "llvm/ADT/StringMap.h"
24#include "llvm/ADT/StringRef.h"
25#include "llvm/Support/Allocator.h"
26#include "llvm/Support/PointerLikeTypeTraits.h"
27#include "llvm/Support/type_traits.h"
28#include <cassert>
29#include <cstddef>
30#include <cstdint>
31#include <cstring>
32#include <string>
33#include <utility>
34
35namespace clang {
36
37class DeclarationName;
38class DeclarationNameTable;
39class IdentifierInfo;
40class LangOptions;
41class MultiKeywordSelector;
42class SourceLocation;
43
44/// A simple pair of identifier info and location.
45using IdentifierLocPair = std::pair<IdentifierInfo *, SourceLocation>;
46
47/// IdentifierInfo and other related classes are aligned to
48/// 8 bytes so that DeclarationName can use the lower 3 bits
49/// of a pointer to one of these classes.
50enum { IdentifierInfoAlignment = 8 };
51
52/// One of these records is kept for each identifier that
53/// is lexed. This contains information about whether the token was \#define'd,
54/// is a language keyword, or if it is a front-end token of some sort (e.g. a
55/// variable or function name). The preprocessor keeps this information in a
56/// set, and all tok::identifier tokens have a pointer to one of these.
57/// It is aligned to 8 bytes because DeclarationName needs the lower 3 bits.
58class alignas(IdentifierInfoAlignment) IdentifierInfo {
59 friend class IdentifierTable;
60
61 // Front-end token ID or tok::identifier.
62 unsigned TokenID : 9;
63
64 // ObjC keyword ('protocol' in '@protocol') or builtin (__builtin_inf).
65 // First NUM_OBJC_KEYWORDS values are for Objective-C,
66 // the remaining values are for builtins.
67 unsigned ObjCOrBuiltinID : 13;
68
69 // True if there is a #define for this.
70 unsigned HasMacro : 1;
71
72 // True if there was a #define for this.
73 unsigned HadMacro : 1;
74
75 // True if the identifier is a language extension.
76 unsigned IsExtension : 1;
77
78 // True if the identifier is a keyword in a newer or proposed Standard.
79 unsigned IsFutureCompatKeyword : 1;
80
81 // True if the identifier is poisoned.
82 unsigned IsPoisoned : 1;
83
84 // True if the identifier is a C++ operator keyword.
85 unsigned IsCPPOperatorKeyword : 1;
86
87 // Internal bit set by the member function RecomputeNeedsHandleIdentifier.
88 // See comment about RecomputeNeedsHandleIdentifier for more info.
89 unsigned NeedsHandleIdentifier : 1;
90
91 // True if the identifier was loaded (at least partially) from an AST file.
92 unsigned IsFromAST : 1;
93
94 // True if the identifier has changed from the definition
95 // loaded from an AST file.
96 unsigned ChangedAfterLoad : 1;
97
98 // True if the identifier's frontend information has changed from the
99 // definition loaded from an AST file.
100 unsigned FEChangedAfterLoad : 1;
101
102 // True if revertTokenIDToIdentifier was called.
103 unsigned RevertedTokenID : 1;
104
105 // True if there may be additional information about
106 // this identifier stored externally.
107 unsigned OutOfDate : 1;
108
109 // True if this is the 'import' contextual keyword.
110 unsigned IsModulesImport : 1;
111
112 // 29 bits left in a 64-bit word.
113
114 // Managed by the language front-end.
115 void *FETokenInfo = nullptr;
116
117 llvm::StringMapEntry<IdentifierInfo *> *Entry = nullptr;
118
119public:
120 IdentifierInfo();
121 IdentifierInfo(const IdentifierInfo &) = delete;
122 IdentifierInfo &operator=(const IdentifierInfo &) = delete;
123
124 /// Return true if this is the identifier for the specified string.
125 ///
126 /// This is intended to be used for string literals only: II->isStr("foo").
127 template <std::size_t StrLen>
128 bool isStr(const char (&Str)[StrLen]) const {
129 return getLength() == StrLen-1 &&
130 memcmp(getNameStart(), Str, StrLen-1) == 0;
131 }
132
133 /// Return true if this is the identifier for the specified StringRef.
134 bool isStr(llvm::StringRef Str) const {
135 llvm::StringRef ThisStr(getNameStart(), getLength());
136 return ThisStr == Str;
137 }
138
139 /// Return the beginning of the actual null-terminated string for this
140 /// identifier.
141 const char *getNameStart() const {
142 if (Entry) return Entry->getKeyData();
143 // FIXME: This is gross. It would be best not to embed specific details
144 // of the PTH file format here.
145 // The 'this' pointer really points to a
146 // std::pair<IdentifierInfo, const char*>, where internal pointer
147 // points to the external string data.
148 using actualtype = std::pair<IdentifierInfo, const char *>;
149
150 return ((const actualtype*) this)->second;
151 }
152
153 /// Efficiently return the length of this identifier info.
154 unsigned getLength() const {
155 if (Entry) return Entry->getKeyLength();
156 // FIXME: This is gross. It would be best not to embed specific details
157 // of the PTH file format here.
158 // The 'this' pointer really points to a
159 // std::pair<IdentifierInfo, const char*>, where internal pointer
160 // points to the external string data.
161 using actualtype = std::pair<IdentifierInfo, const char *>;
162
163 const char* p = ((const actualtype*) this)->second - 2;
164 return (((unsigned) p[0]) | (((unsigned) p[1]) << 8)) - 1;
165 }
166
167 /// Return the actual identifier string.
168 StringRef getName() const {
169 return StringRef(getNameStart(), getLength());
170 }
171
172 /// Return true if this identifier is \#defined to some other value.
173 /// \note The current definition may be in a module and not currently visible.
174 bool hasMacroDefinition() const {
175 return HasMacro;
176 }
177 void setHasMacroDefinition(bool Val) {
178 if (HasMacro == Val) return;
179
180 HasMacro = Val;
181 if (Val) {
182 NeedsHandleIdentifier = true;
183 HadMacro = true;
184 } else {
185 RecomputeNeedsHandleIdentifier();
186 }
187 }
188 /// Returns true if this identifier was \#defined to some value at any
189 /// moment. In this case there should be an entry for the identifier in the
190 /// macro history table in Preprocessor.
191 bool hadMacroDefinition() const {
192 return HadMacro;
193 }
194
195 /// If this is a source-language token (e.g. 'for'), this API
196 /// can be used to cause the lexer to map identifiers to source-language
197 /// tokens.
198 tok::TokenKind getTokenID() const { return (tok::TokenKind)TokenID; }
199
200 /// True if revertTokenIDToIdentifier() was called.
201 bool hasRevertedTokenIDToIdentifier() const { return RevertedTokenID; }
202
203 /// Revert TokenID to tok::identifier; used for GNU libstdc++ 4.2
204 /// compatibility.
205 ///
206 /// TokenID is normally read-only but there are 2 instances where we revert it
207 /// to tok::identifier for libstdc++ 4.2. Keep track of when this happens
208 /// using this method so we can inform serialization about it.
209 void revertTokenIDToIdentifier() {
210 assert(TokenID != tok::identifier && "Already at tok::identifier");
211 TokenID = tok::identifier;
212 RevertedTokenID = true;
213 }
214 void revertIdentifierToTokenID(tok::TokenKind TK) {
215 assert(TokenID == tok::identifier && "Should be at tok::identifier");
216 TokenID = TK;
217 RevertedTokenID = false;
218 }
219
220 /// Return the preprocessor keyword ID for this identifier.
221 ///
222 /// For example, "define" will return tok::pp_define.
223 tok::PPKeywordKind getPPKeywordID() const;
224
225 /// Return the Objective-C keyword ID for the this identifier.
226 ///
227 /// For example, 'class' will return tok::objc_class if ObjC is enabled.
228 tok::ObjCKeywordKind getObjCKeywordID() const {
229 if (ObjCOrBuiltinID < tok::NUM_OBJC_KEYWORDS)
230 return tok::ObjCKeywordKind(ObjCOrBuiltinID);
231 else
232 return tok::objc_not_keyword;
233 }
234 void setObjCKeywordID(tok::ObjCKeywordKind ID) { ObjCOrBuiltinID = ID; }
235
236 /// True if setNotBuiltin() was called.
237 bool hasRevertedBuiltin() const {
238 return ObjCOrBuiltinID == tok::NUM_OBJC_KEYWORDS;
239 }
240
241 /// Revert the identifier to a non-builtin identifier. We do this if
242 /// the name of a known builtin library function is used to declare that
243 /// function, but an unexpected type is specified.
244 void revertBuiltin() {
245 setBuiltinID(0);
246 }
247
248 /// Return a value indicating whether this is a builtin function.
249 ///
250 /// 0 is not-built-in. 1+ are specific builtin functions.
251 unsigned getBuiltinID() const {
252 if (ObjCOrBuiltinID >= tok::NUM_OBJC_KEYWORDS)
253 return ObjCOrBuiltinID - tok::NUM_OBJC_KEYWORDS;
254 else
255 return 0;
256 }
257 void setBuiltinID(unsigned ID) {
258 ObjCOrBuiltinID = ID + tok::NUM_OBJC_KEYWORDS;
259 assert(ObjCOrBuiltinID - unsigned(tok::NUM_OBJC_KEYWORDS) == ID
260 && "ID too large for field!");
261 }
262
263 unsigned getObjCOrBuiltinID() const { return ObjCOrBuiltinID; }
264 void setObjCOrBuiltinID(unsigned ID) { ObjCOrBuiltinID = ID; }
265
266 /// get/setExtension - Initialize information about whether or not this
267 /// language token is an extension. This controls extension warnings, and is
268 /// only valid if a custom token ID is set.
269 bool isExtensionToken() const { return IsExtension; }
270 void setIsExtensionToken(bool Val) {
271 IsExtension = Val;
272 if (Val)
273 NeedsHandleIdentifier = true;
274 else
275 RecomputeNeedsHandleIdentifier();
276 }
277
278 /// is/setIsFutureCompatKeyword - Initialize information about whether or not
279 /// this language token is a keyword in a newer or proposed Standard. This
280 /// controls compatibility warnings, and is only true when not parsing the
281 /// corresponding Standard. Once a compatibility problem has been diagnosed
282 /// with this keyword, the flag will be cleared.
283 bool isFutureCompatKeyword() const { return IsFutureCompatKeyword; }
284 void setIsFutureCompatKeyword(bool Val) {
285 IsFutureCompatKeyword = Val;
286 if (Val)
287 NeedsHandleIdentifier = true;
288 else
289 RecomputeNeedsHandleIdentifier();
290 }
291
292 /// setIsPoisoned - Mark this identifier as poisoned. After poisoning, the
293 /// Preprocessor will emit an error every time this token is used.
294 void setIsPoisoned(bool Value = true) {
295 IsPoisoned = Value;
296 if (Value)
297 NeedsHandleIdentifier = true;
298 else
299 RecomputeNeedsHandleIdentifier();
300 }
301
302 /// Return true if this token has been poisoned.
303 bool isPoisoned() const { return IsPoisoned; }
304
305 /// isCPlusPlusOperatorKeyword/setIsCPlusPlusOperatorKeyword controls whether
306 /// this identifier is a C++ alternate representation of an operator.
307 void setIsCPlusPlusOperatorKeyword(bool Val = true) {
308 IsCPPOperatorKeyword = Val;
309 }
310 bool isCPlusPlusOperatorKeyword() const { return IsCPPOperatorKeyword; }
311
312 /// Return true if this token is a keyword in the specified language.
313 bool isKeyword(const LangOptions &LangOpts) const;
314
315 /// Return true if this token is a C++ keyword in the specified
316 /// language.
317 bool isCPlusPlusKeyword(const LangOptions &LangOpts) const;
318
319 /// Get and set FETokenInfo. The language front-end is allowed to associate
320 /// arbitrary metadata with this token.
321 void *getFETokenInfo() const { return FETokenInfo; }
322 void setFETokenInfo(void *T) { FETokenInfo = T; }
323
324 /// Return true if the Preprocessor::HandleIdentifier must be called
325 /// on a token of this identifier.
326 ///
327 /// If this returns false, we know that HandleIdentifier will not affect
328 /// the token.
329 bool isHandleIdentifierCase() const { return NeedsHandleIdentifier; }
330
331 /// Return true if the identifier in its current state was loaded
332 /// from an AST file.
333 bool isFromAST() const { return IsFromAST; }
334
335 void setIsFromAST() { IsFromAST = true; }
336
337 /// Determine whether this identifier has changed since it was loaded
338 /// from an AST file.
339 bool hasChangedSinceDeserialization() const {
340 return ChangedAfterLoad;
341 }
342
343 /// Note that this identifier has changed since it was loaded from
344 /// an AST file.
345 void setChangedSinceDeserialization() {
346 ChangedAfterLoad = true;
347 }
348
349 /// Determine whether the frontend token information for this
350 /// identifier has changed since it was loaded from an AST file.
351 bool hasFETokenInfoChangedSinceDeserialization() const {
352 return FEChangedAfterLoad;
353 }
354
355 /// Note that the frontend token information for this identifier has
356 /// changed since it was loaded from an AST file.
357 void setFETokenInfoChangedSinceDeserialization() {
358 FEChangedAfterLoad = true;
359 }
360
361 /// Determine whether the information for this identifier is out of
362 /// date with respect to the external source.
363 bool isOutOfDate() const { return OutOfDate; }
364
365 /// Set whether the information for this identifier is out of
366 /// date with respect to the external source.
367 void setOutOfDate(bool OOD) {
368 OutOfDate = OOD;
369 if (OOD)
370 NeedsHandleIdentifier = true;
371 else
372 RecomputeNeedsHandleIdentifier();
373 }
374
375 /// Determine whether this is the contextual keyword \c import.
376 bool isModulesImport() const { return IsModulesImport; }
377
378 /// Set whether this identifier is the contextual keyword \c import.
379 void setModulesImport(bool I) {
380 IsModulesImport = I;
381 if (I)
382 NeedsHandleIdentifier = true;
383 else
384 RecomputeNeedsHandleIdentifier();
385 }
386
387 /// Return true if this identifier is an editor placeholder.
388 ///
389 /// Editor placeholders are produced by the code-completion engine and are
390 /// represented as characters between '<#' and '#>' in the source code. An
391 /// example of auto-completed call with a placeholder parameter is shown
392 /// below:
393 /// \code
394 /// function(<#int x#>);
395 /// \endcode
396 bool isEditorPlaceholder() const {
397 return getName().startswith("<#") && getName().endswith("#>");
398 }
399
400 /// Provide less than operator for lexicographical sorting.
401 bool operator<(const IdentifierInfo &RHS) const {
402 return getName() < RHS.getName();
403 }
404
405private:
406 /// The Preprocessor::HandleIdentifier does several special (but rare)
407 /// things to identifiers of various sorts. For example, it changes the
408 /// \c for keyword token from tok::identifier to tok::for.
409 ///
410 /// This method is very tied to the definition of HandleIdentifier. Any
411 /// change to it should be reflected here.
412 void RecomputeNeedsHandleIdentifier() {
413 NeedsHandleIdentifier = isPoisoned() || hasMacroDefinition() ||
414 isExtensionToken() || isFutureCompatKeyword() ||
415 isOutOfDate() || isModulesImport();
416 }
417};
418
419/// An RAII object for [un]poisoning an identifier within a scope.
420///
421/// \p II is allowed to be null, in which case objects of this type have
422/// no effect.
423class PoisonIdentifierRAIIObject {
424 IdentifierInfo *const II;
425 const bool OldValue;
426
427public:
428 PoisonIdentifierRAIIObject(IdentifierInfo *II, bool NewValue)
429 : II(II), OldValue(II ? II->isPoisoned() : false) {
430 if(II)
431 II->setIsPoisoned(NewValue);
432 }
433
434 ~PoisonIdentifierRAIIObject() {
435 if(II)
436 II->setIsPoisoned(OldValue);
437 }
438};
439
440/// An iterator that walks over all of the known identifiers
441/// in the lookup table.
442///
443/// Since this iterator uses an abstract interface via virtual
444/// functions, it uses an object-oriented interface rather than the
445/// more standard C++ STL iterator interface. In this OO-style
446/// iteration, the single function \c Next() provides dereference,
447/// advance, and end-of-sequence checking in a single
448/// operation. Subclasses of this iterator type will provide the
449/// actual functionality.
450class IdentifierIterator {
451protected:
452 IdentifierIterator() = default;
453
454public:
455 IdentifierIterator(const IdentifierIterator &) = delete;
456 IdentifierIterator &operator=(const IdentifierIterator &) = delete;
457
458 virtual ~IdentifierIterator();
459
460 /// Retrieve the next string in the identifier table and
461 /// advances the iterator for the following string.
462 ///
463 /// \returns The next string in the identifier table. If there is
464 /// no such string, returns an empty \c StringRef.
465 virtual StringRef Next() = 0;
466};
467
468/// Provides lookups to, and iteration over, IdentiferInfo objects.
469class IdentifierInfoLookup {
470public:
471 virtual ~IdentifierInfoLookup();
472
473 /// Return the IdentifierInfo for the specified named identifier.
474 ///
475 /// Unlike the version in IdentifierTable, this returns a pointer instead
476 /// of a reference. If the pointer is null then the IdentifierInfo cannot
477 /// be found.
478 virtual IdentifierInfo* get(StringRef Name) = 0;
479
480 /// Retrieve an iterator into the set of all identifiers
481 /// known to this identifier lookup source.
482 ///
483 /// This routine provides access to all of the identifiers known to
484 /// the identifier lookup, allowing access to the contents of the
485 /// identifiers without introducing the overhead of constructing
486 /// IdentifierInfo objects for each.
487 ///
488 /// \returns A new iterator into the set of known identifiers. The
489 /// caller is responsible for deleting this iterator.
490 virtual IdentifierIterator *getIdentifiers();
491};
492
493/// Implements an efficient mapping from strings to IdentifierInfo nodes.
494///
495/// This has no other purpose, but this is an extremely performance-critical
496/// piece of the code, as each occurrence of every identifier goes through
497/// here when lexed.
498class IdentifierTable {
499 // Shark shows that using MallocAllocator is *much* slower than using this
500 // BumpPtrAllocator!
501 using HashTableTy = llvm::StringMap<IdentifierInfo *, llvm::BumpPtrAllocator>;
502 HashTableTy HashTable;
503
504 IdentifierInfoLookup* ExternalLookup;
505
506public:
507 /// Create the identifier table.
508 explicit IdentifierTable(IdentifierInfoLookup *ExternalLookup = nullptr);
509
510 /// Create the identifier table, populating it with info about the
511 /// language keywords for the language specified by \p LangOpts.
512 explicit IdentifierTable(const LangOptions &LangOpts,
513 IdentifierInfoLookup *ExternalLookup = nullptr);
514
515 /// Set the external identifier lookup mechanism.
516 void setExternalIdentifierLookup(IdentifierInfoLookup *IILookup) {
517 ExternalLookup = IILookup;
518 }
519
520 /// Retrieve the external identifier lookup object, if any.
521 IdentifierInfoLookup *getExternalIdentifierLookup() const {
522 return ExternalLookup;
523 }
524
525 llvm::BumpPtrAllocator& getAllocator() {
526 return HashTable.getAllocator();
527 }
528
529 /// Return the identifier token info for the specified named
530 /// identifier.
531 IdentifierInfo &get(StringRef Name) {
532 auto &Entry = *HashTable.insert(std::make_pair(Name, nullptr)).first;
533
534 IdentifierInfo *&II = Entry.second;
535 if (II) return *II;
536
537 // No entry; if we have an external lookup, look there first.
538 if (ExternalLookup) {
539 II = ExternalLookup->get(Name);
540 if (II)
541 return *II;
542 }
543
544 // Lookups failed, make a new IdentifierInfo.
545 void *Mem = getAllocator().Allocate<IdentifierInfo>();
546 II = new (Mem) IdentifierInfo();
547
548 // Make sure getName() knows how to find the IdentifierInfo
549 // contents.
550 II->Entry = &Entry;
551
552 return *II;
553 }
554
555 IdentifierInfo &get(StringRef Name, tok::TokenKind TokenCode) {
556 IdentifierInfo &II = get(Name);
557 II.TokenID = TokenCode;
558 assert(II.TokenID == (unsigned) TokenCode && "TokenCode too large");
559 return II;
560 }
561
562 /// Gets an IdentifierInfo for the given name without consulting
563 /// external sources.
564 ///
565 /// This is a version of get() meant for external sources that want to
566 /// introduce or modify an identifier. If they called get(), they would
567 /// likely end up in a recursion.
568 IdentifierInfo &getOwn(StringRef Name) {
569 auto &Entry = *HashTable.insert(std::make_pair(Name, nullptr)).first;
570
571 IdentifierInfo *&II = Entry.second;
572 if (II)
573 return *II;
574
575 // Lookups failed, make a new IdentifierInfo.
576 void *Mem = getAllocator().Allocate<IdentifierInfo>();
577 II = new (Mem) IdentifierInfo();
578
579 // Make sure getName() knows how to find the IdentifierInfo
580 // contents.
581 II->Entry = &Entry;
582
583 // If this is the 'import' contextual keyword, mark it as such.
584 if (Name.equals("import"))
585 II->setModulesImport(true);
586
587 return *II;
588 }
589
590 using iterator = HashTableTy::const_iterator;
591 using const_iterator = HashTableTy::const_iterator;
592
593 iterator begin() const { return HashTable.begin(); }
594 iterator end() const { return HashTable.end(); }
595 unsigned size() const { return HashTable.size(); }
596
597 /// Print some statistics to stderr that indicate how well the
598 /// hashing is doing.
599 void PrintStats() const;
600
601 /// Populate the identifier table with info about the language keywords
602 /// for the language specified by \p LangOpts.
603 void AddKeywords(const LangOptions &LangOpts);
604};
605
606/// A family of Objective-C methods.
607///
608/// These families have no inherent meaning in the language, but are
609/// nonetheless central enough in the existing implementations to
610/// merit direct AST support. While, in theory, arbitrary methods can
611/// be considered to form families, we focus here on the methods
612/// involving allocation and retain-count management, as these are the
613/// most "core" and the most likely to be useful to diverse clients
614/// without extra information.
615///
616/// Both selectors and actual method declarations may be classified
617/// into families. Method families may impose additional restrictions
618/// beyond their selector name; for example, a method called '_init'
619/// that returns void is not considered to be in the 'init' family
620/// (but would be if it returned 'id'). It is also possible to
621/// explicitly change or remove a method's family. Therefore the
622/// method's family should be considered the single source of truth.
623enum ObjCMethodFamily {
624 /// No particular method family.
625 OMF_None,
626
627 // Selectors in these families may have arbitrary arity, may be
628 // written with arbitrary leading underscores, and may have
629 // additional CamelCase "words" in their first selector chunk
630 // following the family name.
631 OMF_alloc,
632 OMF_copy,
633 OMF_init,
634 OMF_mutableCopy,
635 OMF_new,
636
637 // These families are singletons consisting only of the nullary
638 // selector with the given name.
639 OMF_autorelease,
640 OMF_dealloc,
641 OMF_finalize,
642 OMF_release,
643 OMF_retain,
644 OMF_retainCount,
645 OMF_self,
646 OMF_initialize,
647
648 // performSelector families
649 OMF_performSelector
650};
651
652/// Enough bits to store any enumerator in ObjCMethodFamily or
653/// InvalidObjCMethodFamily.
654enum { ObjCMethodFamilyBitWidth = 4 };
655
656/// An invalid value of ObjCMethodFamily.
657enum { InvalidObjCMethodFamily = (1 << ObjCMethodFamilyBitWidth) - 1 };
658
659/// A family of Objective-C methods.
660///
661/// These are family of methods whose result type is initially 'id', but
662/// but are candidate for the result type to be changed to 'instancetype'.
663enum ObjCInstanceTypeFamily {
664 OIT_None,
665 OIT_Array,
666 OIT_Dictionary,
667 OIT_Singleton,
668 OIT_Init,
669 OIT_ReturnsSelf
670};
671
672enum ObjCStringFormatFamily {
673 SFF_None,
674 SFF_NSString,
675 SFF_CFString
676};
677
678/// Smart pointer class that efficiently represents Objective-C method
679/// names.
680///
681/// This class will either point to an IdentifierInfo or a
682/// MultiKeywordSelector (which is private). This enables us to optimize
683/// selectors that take no arguments and selectors that take 1 argument, which
684/// accounts for 78% of all selectors in Cocoa.h.
685class Selector {
686 friend class Diagnostic;
687 friend class SelectorTable; // only the SelectorTable can create these
688 friend class DeclarationName; // and the AST's DeclarationName.
689
690 enum IdentifierInfoFlag {
691 // Empty selector = 0. Note that these enumeration values must
692 // correspond to the enumeration values of DeclarationName::StoredNameKind
693 ZeroArg = 0x01,
694 OneArg = 0x02,
695 MultiArg = 0x07,
696 ArgFlags = 0x07
697 };
698
699 /// A pointer to the MultiKeywordSelector or IdentifierInfo. We use the low
700 /// three bits of InfoPtr to store an IdentifierInfoFlag. Note that in any
701 /// case IdentifierInfo and MultiKeywordSelector are already aligned to
702 /// 8 bytes even on 32 bits archs because of DeclarationName.
703 uintptr_t InfoPtr = 0;
704
705 Selector(IdentifierInfo *II, unsigned nArgs) {
706 InfoPtr = reinterpret_cast<uintptr_t>(II);
707 assert((InfoPtr & ArgFlags) == 0 &&"Insufficiently aligned IdentifierInfo");
708 assert(nArgs < 2 && "nArgs not equal to 0/1");
709 InfoPtr |= nArgs+1;
710 }
711
712 Selector(MultiKeywordSelector *SI) {
713 InfoPtr = reinterpret_cast<uintptr_t>(SI);
714 assert((InfoPtr & ArgFlags) == 0 &&"Insufficiently aligned IdentifierInfo");
715 InfoPtr |= MultiArg;
716 }
717
718 IdentifierInfo *getAsIdentifierInfo() const {
719 if (getIdentifierInfoFlag() < MultiArg)
720 return reinterpret_cast<IdentifierInfo *>(InfoPtr & ~ArgFlags);
721 return nullptr;
722 }
723
724 MultiKeywordSelector *getMultiKeywordSelector() const {
725 return reinterpret_cast<MultiKeywordSelector *>(InfoPtr & ~ArgFlags);
726 }
727
728 unsigned getIdentifierInfoFlag() const {
729 return InfoPtr & ArgFlags;
730 }
731
732 static ObjCMethodFamily getMethodFamilyImpl(Selector sel);
733
734 static ObjCStringFormatFamily getStringFormatFamilyImpl(Selector sel);
735
736public:
737 /// The default ctor should only be used when creating data structures that
738 /// will contain selectors.
739 Selector() = default;
740 explicit Selector(uintptr_t V) : InfoPtr(V) {}
741
742 /// operator==/!= - Indicate whether the specified selectors are identical.
743 bool operator==(Selector RHS) const {
744 return InfoPtr == RHS.InfoPtr;
745 }
746 bool operator!=(Selector RHS) const {
747 return InfoPtr != RHS.InfoPtr;
748 }
749
750 void *getAsOpaquePtr() const {
751 return reinterpret_cast<void*>(InfoPtr);
752 }
753
754 /// Determine whether this is the empty selector.
755 bool isNull() const { return InfoPtr == 0; }
756
757 // Predicates to identify the selector type.
758 bool isKeywordSelector() const {
759 return getIdentifierInfoFlag() != ZeroArg;
760 }
761
762 bool isUnarySelector() const {
763 return getIdentifierInfoFlag() == ZeroArg;
764 }
765
766 unsigned getNumArgs() const;
767
768 /// Retrieve the identifier at a given position in the selector.
769 ///
770 /// Note that the identifier pointer returned may be NULL. Clients that only
771 /// care about the text of the identifier string, and not the specific,
772 /// uniqued identifier pointer, should use \c getNameForSlot(), which returns
773 /// an empty string when the identifier pointer would be NULL.
774 ///
775 /// \param argIndex The index for which we want to retrieve the identifier.
776 /// This index shall be less than \c getNumArgs() unless this is a keyword
777 /// selector, in which case 0 is the only permissible value.
778 ///
779 /// \returns the uniqued identifier for this slot, or NULL if this slot has
780 /// no corresponding identifier.
781 IdentifierInfo *getIdentifierInfoForSlot(unsigned argIndex) const;
782
783 /// Retrieve the name at a given position in the selector.
784 ///
785 /// \param argIndex The index for which we want to retrieve the name.
786 /// This index shall be less than \c getNumArgs() unless this is a keyword
787 /// selector, in which case 0 is the only permissible value.
788 ///
789 /// \returns the name for this slot, which may be the empty string if no
790 /// name was supplied.
791 StringRef getNameForSlot(unsigned argIndex) const;
792
793 /// Derive the full selector name (e.g. "foo:bar:") and return
794 /// it as an std::string.
795 std::string getAsString() const;
796
797 /// Prints the full selector name (e.g. "foo:bar:").
798 void print(llvm::raw_ostream &OS) const;
799
800 void dump() const;
801
802 /// Derive the conventional family of this method.
803 ObjCMethodFamily getMethodFamily() const {
804 return getMethodFamilyImpl(*this);
805 }
806
807 ObjCStringFormatFamily getStringFormatFamily() const {
808 return getStringFormatFamilyImpl(*this);
809 }
810
811 static Selector getEmptyMarker() {
812 return Selector(uintptr_t(-1));
813 }
814
815 static Selector getTombstoneMarker() {
816 return Selector(uintptr_t(-2));
817 }
818
819 static ObjCInstanceTypeFamily getInstTypeMethodFamily(Selector sel);
820};
821
822/// This table allows us to fully hide how we implement
823/// multi-keyword caching.
824class SelectorTable {
825 // Actually a SelectorTableImpl
826 void *Impl;
827
828public:
829 SelectorTable();
830 SelectorTable(const SelectorTable &) = delete;
831 SelectorTable &operator=(const SelectorTable &) = delete;
832 ~SelectorTable();
833
834 /// Can create any sort of selector.
835 ///
836 /// \p NumArgs indicates whether this is a no argument selector "foo", a
837 /// single argument selector "foo:" or multi-argument "foo:bar:".
838 Selector getSelector(unsigned NumArgs, IdentifierInfo **IIV);
839
840 Selector getUnarySelector(IdentifierInfo *ID) {
841 return Selector(ID, 1);
842 }
843
844 Selector getNullarySelector(IdentifierInfo *ID) {
845 return Selector(ID, 0);
846 }
847
848 /// Return the total amount of memory allocated for managing selectors.
849 size_t getTotalMemory() const;
850
851 /// Return the default setter name for the given identifier.
852 ///
853 /// This is "set" + \p Name where the initial character of \p Name
854 /// has been capitalized.
855 static SmallString<64> constructSetterName(StringRef Name);
856
857 /// Return the default setter selector for the given identifier.
858 ///
859 /// This is "set" + \p Name where the initial character of \p Name
860 /// has been capitalized.
861 static Selector constructSetterSelector(IdentifierTable &Idents,
862 SelectorTable &SelTable,
863 const IdentifierInfo *Name);
864
865 /// Return the property name for the given setter selector.
866 static std::string getPropertyNameFromSetterSelector(Selector Sel);
867};
868
869namespace detail {
870
871/// DeclarationNameExtra is used as a base of various uncommon special names.
872/// This class is needed since DeclarationName has not enough space to store
873/// the kind of every possible names. Therefore the kind of common names is
874/// stored directly in DeclarationName, and the kind of uncommon names is
875/// stored in DeclarationNameExtra. It is aligned to 8 bytes because
876/// DeclarationName needs the lower 3 bits to store the kind of common names.
877/// DeclarationNameExtra is tightly coupled to DeclarationName and any change
878/// here is very likely to require changes in DeclarationName(Table).
879class alignas(IdentifierInfoAlignment) DeclarationNameExtra {
880 friend class clang::DeclarationName;
881 friend class clang::DeclarationNameTable;
882
883protected:
884 /// The kind of "extra" information stored in the DeclarationName. See
885 /// @c ExtraKindOrNumArgs for an explanation of how these enumerator values
886 /// are used. Note that DeclarationName depends on the numerical values
887 /// of the enumerators in this enum. See DeclarationName::StoredNameKind
888 /// for more info.
889 enum ExtraKind {
890 CXXDeductionGuideName,
891 CXXLiteralOperatorName,
892 CXXUsingDirective,
893 ObjCMultiArgSelector
894 };
895
896 /// ExtraKindOrNumArgs has one of the following meaning:
897 /// * The kind of an uncommon C++ special name. This DeclarationNameExtra
898 /// is in this case in fact either a CXXDeductionGuideNameExtra or
899 /// a CXXLiteralOperatorIdName.
900 ///
901 /// * It may be also name common to C++ using-directives (CXXUsingDirective),
902 ///
903 /// * Otherwise it is ObjCMultiArgSelector+NumArgs, where NumArgs is
904 /// the number of arguments in the Objective-C selector, in which
905 /// case the DeclarationNameExtra is also a MultiKeywordSelector.
906 unsigned ExtraKindOrNumArgs;
907
908 DeclarationNameExtra(ExtraKind Kind) : ExtraKindOrNumArgs(Kind) {}
909 DeclarationNameExtra(unsigned NumArgs)
910 : ExtraKindOrNumArgs(ObjCMultiArgSelector + NumArgs) {}
911
912 /// Return the corresponding ExtraKind.
913 ExtraKind getKind() const {
914 return static_cast<ExtraKind>(ExtraKindOrNumArgs >
915 (unsigned)ObjCMultiArgSelector
916 ? (unsigned)ObjCMultiArgSelector
917 : ExtraKindOrNumArgs);
918 }
919
920 /// Return the number of arguments in an ObjC selector. Only valid when this
921 /// is indeed an ObjCMultiArgSelector.
922 unsigned getNumArgs() const {
923 assert(ExtraKindOrNumArgs >= (unsigned)ObjCMultiArgSelector &&
924 "getNumArgs called but this is not an ObjC selector!");
925 return ExtraKindOrNumArgs - (unsigned)ObjCMultiArgSelector;
926 }
927};
928
929} // namespace detail
930
931} // namespace clang
932
933namespace llvm {
934
935/// Define DenseMapInfo so that Selectors can be used as keys in DenseMap and
936/// DenseSets.
937template <>
938struct DenseMapInfo<clang::Selector> {
939 static clang::Selector getEmptyKey() {
940 return clang::Selector::getEmptyMarker();
941 }
942
943 static clang::Selector getTombstoneKey() {
944 return clang::Selector::getTombstoneMarker();
945 }
946
947 static unsigned getHashValue(clang::Selector S);
948
949 static bool isEqual(clang::Selector LHS, clang::Selector RHS) {
950 return LHS == RHS;
951 }
952};
953
954template <>
955struct isPodLike<clang::Selector> { static const bool value = true; };
956
957template<>
958struct PointerLikeTypeTraits<clang::Selector> {
959 static const void *getAsVoidPointer(clang::Selector P) {
960 return P.getAsOpaquePtr();
961 }
962
963 static clang::Selector getFromVoidPointer(const void *P) {
964 return clang::Selector(reinterpret_cast<uintptr_t>(P));
965 }
966
967 enum { NumLowBitsAvailable = 0 };
968};
969
970// Provide PointerLikeTypeTraits for IdentifierInfo pointers, which
971// are not guaranteed to be 8-byte aligned.
972template<>
973struct PointerLikeTypeTraits<clang::IdentifierInfo*> {
974 static void *getAsVoidPointer(clang::IdentifierInfo* P) {
975 return P;
976 }
977
978 static clang::IdentifierInfo *getFromVoidPointer(void *P) {
979 return static_cast<clang::IdentifierInfo*>(P);
980 }
981
982 enum { NumLowBitsAvailable = 1 };
983};
984
985template<>
986struct PointerLikeTypeTraits<const clang::IdentifierInfo*> {
987 static const void *getAsVoidPointer(const clang::IdentifierInfo* P) {
988 return P;
989 }
990
991 static const clang::IdentifierInfo *getFromVoidPointer(const void *P) {
992 return static_cast<const clang::IdentifierInfo*>(P);
993 }
994
995 enum { NumLowBitsAvailable = 1 };
996};
997
998} // namespace llvm
999
1000#endif // LLVM_CLANG_BASIC_IDENTIFIERTABLE_H
1001