1//===- IdentifierTable.h - Hash table for identifier lookup -----*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// Defines the clang::IdentifierInfo, clang::IdentifierTable, and
11/// clang::Selector interfaces.
12//
13//===----------------------------------------------------------------------===//
14
15#ifndef LLVM_CLANG_BASIC_IDENTIFIERTABLE_H
16#define LLVM_CLANG_BASIC_IDENTIFIERTABLE_H
17
18#include "clang/Basic/LLVM.h"
19#include "clang/Basic/TokenKinds.h"
20#include "llvm/ADT/DenseMapInfo.h"
21#include "llvm/ADT/SmallString.h"
22#include "llvm/ADT/StringMap.h"
23#include "llvm/ADT/StringRef.h"
24#include "llvm/Support/Allocator.h"
25#include "llvm/Support/PointerLikeTypeTraits.h"
26#include "llvm/Support/type_traits.h"
27#include <cassert>
28#include <cstddef>
29#include <cstdint>
30#include <cstring>
31#include <string>
32#include <utility>
33
34namespace clang {
35
36class DeclarationName;
37class DeclarationNameTable;
38class IdentifierInfo;
39class LangOptions;
40class MultiKeywordSelector;
41class SourceLocation;
42
43/// A simple pair of identifier info and location.
44using IdentifierLocPair = std::pair<IdentifierInfo *, SourceLocation>;
45
46/// IdentifierInfo and other related classes are aligned to
47/// 8 bytes so that DeclarationName can use the lower 3 bits
48/// of a pointer to one of these classes.
49enum { IdentifierInfoAlignment = 8 };
50
51/// One of these records is kept for each identifier that
52/// is lexed. This contains information about whether the token was \#define'd,
53/// is a language keyword, or if it is a front-end token of some sort (e.g. a
54/// variable or function name). The preprocessor keeps this information in a
55/// set, and all tok::identifier tokens have a pointer to one of these.
56/// It is aligned to 8 bytes because DeclarationName needs the lower 3 bits.
57class alignas(IdentifierInfoAlignment) IdentifierInfo {
58 friend class IdentifierTable;
59
60 // Front-end token ID or tok::identifier.
61 unsigned TokenID : 9;
62
63 // ObjC keyword ('protocol' in '@protocol') or builtin (__builtin_inf).
64 // First NUM_OBJC_KEYWORDS values are for Objective-C,
65 // the remaining values are for builtins.
66 unsigned ObjCOrBuiltinID : 13;
67
68 // True if there is a #define for this.
69 unsigned HasMacro : 1;
70
71 // True if there was a #define for this.
72 unsigned HadMacro : 1;
73
74 // True if the identifier is a language extension.
75 unsigned IsExtension : 1;
76
77 // True if the identifier is a keyword in a newer or proposed Standard.
78 unsigned IsFutureCompatKeyword : 1;
79
80 // True if the identifier is poisoned.
81 unsigned IsPoisoned : 1;
82
83 // True if the identifier is a C++ operator keyword.
84 unsigned IsCPPOperatorKeyword : 1;
85
86 // Internal bit set by the member function RecomputeNeedsHandleIdentifier.
87 // See comment about RecomputeNeedsHandleIdentifier for more info.
88 unsigned NeedsHandleIdentifier : 1;
89
90 // True if the identifier was loaded (at least partially) from an AST file.
91 unsigned IsFromAST : 1;
92
93 // True if the identifier has changed from the definition
94 // loaded from an AST file.
95 unsigned ChangedAfterLoad : 1;
96
97 // True if the identifier's frontend information has changed from the
98 // definition loaded from an AST file.
99 unsigned FEChangedAfterLoad : 1;
100
101 // True if revertTokenIDToIdentifier was called.
102 unsigned RevertedTokenID : 1;
103
104 // True if there may be additional information about
105 // this identifier stored externally.
106 unsigned OutOfDate : 1;
107
108 // True if this is the 'import' contextual keyword.
109 unsigned IsModulesImport : 1;
110
111 // 29 bits left in a 64-bit word.
112
113 // Managed by the language front-end.
114 void *FETokenInfo = nullptr;
115
116 llvm::StringMapEntry<IdentifierInfo *> *Entry = nullptr;
117
118 IdentifierInfo()
119 : TokenID(tok::identifier), ObjCOrBuiltinID(0), HasMacro(false),
120 HadMacro(false), IsExtension(false), IsFutureCompatKeyword(false),
121 IsPoisoned(false), IsCPPOperatorKeyword(false),
122 NeedsHandleIdentifier(false), IsFromAST(false), ChangedAfterLoad(false),
123 FEChangedAfterLoad(false), RevertedTokenID(false), OutOfDate(false),
124 IsModulesImport(false) {}
125
126public:
127 IdentifierInfo(const IdentifierInfo &) = delete;
128 IdentifierInfo &operator=(const IdentifierInfo &) = delete;
129 IdentifierInfo(IdentifierInfo &&) = delete;
130 IdentifierInfo &operator=(IdentifierInfo &&) = delete;
131
132 /// Return true if this is the identifier for the specified string.
133 ///
134 /// This is intended to be used for string literals only: II->isStr("foo").
135 template <std::size_t StrLen>
136 bool isStr(const char (&Str)[StrLen]) const {
137 return getLength() == StrLen-1 &&
138 memcmp(getNameStart(), Str, StrLen-1) == 0;
139 }
140
141 /// Return true if this is the identifier for the specified StringRef.
142 bool isStr(llvm::StringRef Str) const {
143 llvm::StringRef ThisStr(getNameStart(), getLength());
144 return ThisStr == Str;
145 }
146
147 /// Return the beginning of the actual null-terminated string for this
148 /// identifier.
149 const char *getNameStart() const { return Entry->getKeyData(); }
150
151 /// Efficiently return the length of this identifier info.
152 unsigned getLength() const { return Entry->getKeyLength(); }
153
154 /// Return the actual identifier string.
155 StringRef getName() const {
156 return StringRef(getNameStart(), getLength());
157 }
158
159 /// Return true if this identifier is \#defined to some other value.
160 /// \note The current definition may be in a module and not currently visible.
161 bool hasMacroDefinition() const {
162 return HasMacro;
163 }
164 void setHasMacroDefinition(bool Val) {
165 if (HasMacro == Val) return;
166
167 HasMacro = Val;
168 if (Val) {
169 NeedsHandleIdentifier = true;
170 HadMacro = true;
171 } else {
172 RecomputeNeedsHandleIdentifier();
173 }
174 }
175 /// Returns true if this identifier was \#defined to some value at any
176 /// moment. In this case there should be an entry for the identifier in the
177 /// macro history table in Preprocessor.
178 bool hadMacroDefinition() const {
179 return HadMacro;
180 }
181
182 /// If this is a source-language token (e.g. 'for'), this API
183 /// can be used to cause the lexer to map identifiers to source-language
184 /// tokens.
185 tok::TokenKind getTokenID() const { return (tok::TokenKind)TokenID; }
186
187 /// True if revertTokenIDToIdentifier() was called.
188 bool hasRevertedTokenIDToIdentifier() const { return RevertedTokenID; }
189
190 /// Revert TokenID to tok::identifier; used for GNU libstdc++ 4.2
191 /// compatibility.
192 ///
193 /// TokenID is normally read-only but there are 2 instances where we revert it
194 /// to tok::identifier for libstdc++ 4.2. Keep track of when this happens
195 /// using this method so we can inform serialization about it.
196 void revertTokenIDToIdentifier() {
197 assert(TokenID != tok::identifier && "Already at tok::identifier");
198 TokenID = tok::identifier;
199 RevertedTokenID = true;
200 }
201 void revertIdentifierToTokenID(tok::TokenKind TK) {
202 assert(TokenID == tok::identifier && "Should be at tok::identifier");
203 TokenID = TK;
204 RevertedTokenID = false;
205 }
206
207 /// Return the preprocessor keyword ID for this identifier.
208 ///
209 /// For example, "define" will return tok::pp_define.
210 tok::PPKeywordKind getPPKeywordID() const;
211
212 /// Return the Objective-C keyword ID for the this identifier.
213 ///
214 /// For example, 'class' will return tok::objc_class if ObjC is enabled.
215 tok::ObjCKeywordKind getObjCKeywordID() const {
216 if (ObjCOrBuiltinID < tok::NUM_OBJC_KEYWORDS)
217 return tok::ObjCKeywordKind(ObjCOrBuiltinID);
218 else
219 return tok::objc_not_keyword;
220 }
221 void setObjCKeywordID(tok::ObjCKeywordKind ID) { ObjCOrBuiltinID = ID; }
222
223 /// True if setNotBuiltin() was called.
224 bool hasRevertedBuiltin() const {
225 return ObjCOrBuiltinID == tok::NUM_OBJC_KEYWORDS;
226 }
227
228 /// Revert the identifier to a non-builtin identifier. We do this if
229 /// the name of a known builtin library function is used to declare that
230 /// function, but an unexpected type is specified.
231 void revertBuiltin() {
232 setBuiltinID(0);
233 }
234
235 /// Return a value indicating whether this is a builtin function.
236 ///
237 /// 0 is not-built-in. 1+ are specific builtin functions.
238 unsigned getBuiltinID() const {
239 if (ObjCOrBuiltinID >= tok::NUM_OBJC_KEYWORDS)
240 return ObjCOrBuiltinID - tok::NUM_OBJC_KEYWORDS;
241 else
242 return 0;
243 }
244 void setBuiltinID(unsigned ID) {
245 ObjCOrBuiltinID = ID + tok::NUM_OBJC_KEYWORDS;
246 assert(ObjCOrBuiltinID - unsigned(tok::NUM_OBJC_KEYWORDS) == ID
247 && "ID too large for field!");
248 }
249
250 unsigned getObjCOrBuiltinID() const { return ObjCOrBuiltinID; }
251 void setObjCOrBuiltinID(unsigned ID) { ObjCOrBuiltinID = ID; }
252
253 /// get/setExtension - Initialize information about whether or not this
254 /// language token is an extension. This controls extension warnings, and is
255 /// only valid if a custom token ID is set.
256 bool isExtensionToken() const { return IsExtension; }
257 void setIsExtensionToken(bool Val) {
258 IsExtension = Val;
259 if (Val)
260 NeedsHandleIdentifier = true;
261 else
262 RecomputeNeedsHandleIdentifier();
263 }
264
265 /// is/setIsFutureCompatKeyword - Initialize information about whether or not
266 /// this language token is a keyword in a newer or proposed Standard. This
267 /// controls compatibility warnings, and is only true when not parsing the
268 /// corresponding Standard. Once a compatibility problem has been diagnosed
269 /// with this keyword, the flag will be cleared.
270 bool isFutureCompatKeyword() const { return IsFutureCompatKeyword; }
271 void setIsFutureCompatKeyword(bool Val) {
272 IsFutureCompatKeyword = Val;
273 if (Val)
274 NeedsHandleIdentifier = true;
275 else
276 RecomputeNeedsHandleIdentifier();
277 }
278
279 /// setIsPoisoned - Mark this identifier as poisoned. After poisoning, the
280 /// Preprocessor will emit an error every time this token is used.
281 void setIsPoisoned(bool Value = true) {
282 IsPoisoned = Value;
283 if (Value)
284 NeedsHandleIdentifier = true;
285 else
286 RecomputeNeedsHandleIdentifier();
287 }
288
289 /// Return true if this token has been poisoned.
290 bool isPoisoned() const { return IsPoisoned; }
291
292 /// isCPlusPlusOperatorKeyword/setIsCPlusPlusOperatorKeyword controls whether
293 /// this identifier is a C++ alternate representation of an operator.
294 void setIsCPlusPlusOperatorKeyword(bool Val = true) {
295 IsCPPOperatorKeyword = Val;
296 }
297 bool isCPlusPlusOperatorKeyword() const { return IsCPPOperatorKeyword; }
298
299 /// Return true if this token is a keyword in the specified language.
300 bool isKeyword(const LangOptions &LangOpts) const;
301
302 /// Return true if this token is a C++ keyword in the specified
303 /// language.
304 bool isCPlusPlusKeyword(const LangOptions &LangOpts) const;
305
306 /// Get and set FETokenInfo. The language front-end is allowed to associate
307 /// arbitrary metadata with this token.
308 void *getFETokenInfo() const { return FETokenInfo; }
309 void setFETokenInfo(void *T) { FETokenInfo = T; }
310
311 /// Return true if the Preprocessor::HandleIdentifier must be called
312 /// on a token of this identifier.
313 ///
314 /// If this returns false, we know that HandleIdentifier will not affect
315 /// the token.
316 bool isHandleIdentifierCase() const { return NeedsHandleIdentifier; }
317
318 /// Return true if the identifier in its current state was loaded
319 /// from an AST file.
320 bool isFromAST() const { return IsFromAST; }
321
322 void setIsFromAST() { IsFromAST = true; }
323
324 /// Determine whether this identifier has changed since it was loaded
325 /// from an AST file.
326 bool hasChangedSinceDeserialization() const {
327 return ChangedAfterLoad;
328 }
329
330 /// Note that this identifier has changed since it was loaded from
331 /// an AST file.
332 void setChangedSinceDeserialization() {
333 ChangedAfterLoad = true;
334 }
335
336 /// Determine whether the frontend token information for this
337 /// identifier has changed since it was loaded from an AST file.
338 bool hasFETokenInfoChangedSinceDeserialization() const {
339 return FEChangedAfterLoad;
340 }
341
342 /// Note that the frontend token information for this identifier has
343 /// changed since it was loaded from an AST file.
344 void setFETokenInfoChangedSinceDeserialization() {
345 FEChangedAfterLoad = true;
346 }
347
348 /// Determine whether the information for this identifier is out of
349 /// date with respect to the external source.
350 bool isOutOfDate() const { return OutOfDate; }
351
352 /// Set whether the information for this identifier is out of
353 /// date with respect to the external source.
354 void setOutOfDate(bool OOD) {
355 OutOfDate = OOD;
356 if (OOD)
357 NeedsHandleIdentifier = true;
358 else
359 RecomputeNeedsHandleIdentifier();
360 }
361
362 /// Determine whether this is the contextual keyword \c import.
363 bool isModulesImport() const { return IsModulesImport; }
364
365 /// Set whether this identifier is the contextual keyword \c import.
366 void setModulesImport(bool I) {
367 IsModulesImport = I;
368 if (I)
369 NeedsHandleIdentifier = true;
370 else
371 RecomputeNeedsHandleIdentifier();
372 }
373
374 /// Return true if this identifier is an editor placeholder.
375 ///
376 /// Editor placeholders are produced by the code-completion engine and are
377 /// represented as characters between '<#' and '#>' in the source code. An
378 /// example of auto-completed call with a placeholder parameter is shown
379 /// below:
380 /// \code
381 /// function(<#int x#>);
382 /// \endcode
383 bool isEditorPlaceholder() const {
384 return getName().startswith("<#") && getName().endswith("#>");
385 }
386
387 /// Provide less than operator for lexicographical sorting.
388 bool operator<(const IdentifierInfo &RHS) const {
389 return getName() < RHS.getName();
390 }
391
392private:
393 /// The Preprocessor::HandleIdentifier does several special (but rare)
394 /// things to identifiers of various sorts. For example, it changes the
395 /// \c for keyword token from tok::identifier to tok::for.
396 ///
397 /// This method is very tied to the definition of HandleIdentifier. Any
398 /// change to it should be reflected here.
399 void RecomputeNeedsHandleIdentifier() {
400 NeedsHandleIdentifier = isPoisoned() || hasMacroDefinition() ||
401 isExtensionToken() || isFutureCompatKeyword() ||
402 isOutOfDate() || isModulesImport();
403 }
404};
405
406/// An RAII object for [un]poisoning an identifier within a scope.
407///
408/// \p II is allowed to be null, in which case objects of this type have
409/// no effect.
410class PoisonIdentifierRAIIObject {
411 IdentifierInfo *const II;
412 const bool OldValue;
413
414public:
415 PoisonIdentifierRAIIObject(IdentifierInfo *II, bool NewValue)
416 : II(II), OldValue(II ? II->isPoisoned() : false) {
417 if(II)
418 II->setIsPoisoned(NewValue);
419 }
420
421 ~PoisonIdentifierRAIIObject() {
422 if(II)
423 II->setIsPoisoned(OldValue);
424 }
425};
426
427/// An iterator that walks over all of the known identifiers
428/// in the lookup table.
429///
430/// Since this iterator uses an abstract interface via virtual
431/// functions, it uses an object-oriented interface rather than the
432/// more standard C++ STL iterator interface. In this OO-style
433/// iteration, the single function \c Next() provides dereference,
434/// advance, and end-of-sequence checking in a single
435/// operation. Subclasses of this iterator type will provide the
436/// actual functionality.
437class IdentifierIterator {
438protected:
439 IdentifierIterator() = default;
440
441public:
442 IdentifierIterator(const IdentifierIterator &) = delete;
443 IdentifierIterator &operator=(const IdentifierIterator &) = delete;
444
445 virtual ~IdentifierIterator();
446
447 /// Retrieve the next string in the identifier table and
448 /// advances the iterator for the following string.
449 ///
450 /// \returns The next string in the identifier table. If there is
451 /// no such string, returns an empty \c StringRef.
452 virtual StringRef Next() = 0;
453};
454
455/// Provides lookups to, and iteration over, IdentiferInfo objects.
456class IdentifierInfoLookup {
457public:
458 virtual ~IdentifierInfoLookup();
459
460 /// Return the IdentifierInfo for the specified named identifier.
461 ///
462 /// Unlike the version in IdentifierTable, this returns a pointer instead
463 /// of a reference. If the pointer is null then the IdentifierInfo cannot
464 /// be found.
465 virtual IdentifierInfo* get(StringRef Name) = 0;
466
467 /// Retrieve an iterator into the set of all identifiers
468 /// known to this identifier lookup source.
469 ///
470 /// This routine provides access to all of the identifiers known to
471 /// the identifier lookup, allowing access to the contents of the
472 /// identifiers without introducing the overhead of constructing
473 /// IdentifierInfo objects for each.
474 ///
475 /// \returns A new iterator into the set of known identifiers. The
476 /// caller is responsible for deleting this iterator.
477 virtual IdentifierIterator *getIdentifiers();
478};
479
480/// Implements an efficient mapping from strings to IdentifierInfo nodes.
481///
482/// This has no other purpose, but this is an extremely performance-critical
483/// piece of the code, as each occurrence of every identifier goes through
484/// here when lexed.
485class IdentifierTable {
486 // Shark shows that using MallocAllocator is *much* slower than using this
487 // BumpPtrAllocator!
488 using HashTableTy = llvm::StringMap<IdentifierInfo *, llvm::BumpPtrAllocator>;
489 HashTableTy HashTable;
490
491 IdentifierInfoLookup* ExternalLookup;
492
493public:
494 /// Create the identifier table.
495 explicit IdentifierTable(IdentifierInfoLookup *ExternalLookup = nullptr);
496
497 /// Create the identifier table, populating it with info about the
498 /// language keywords for the language specified by \p LangOpts.
499 explicit IdentifierTable(const LangOptions &LangOpts,
500 IdentifierInfoLookup *ExternalLookup = nullptr);
501
502 /// Set the external identifier lookup mechanism.
503 void setExternalIdentifierLookup(IdentifierInfoLookup *IILookup) {
504 ExternalLookup = IILookup;
505 }
506
507 /// Retrieve the external identifier lookup object, if any.
508 IdentifierInfoLookup *getExternalIdentifierLookup() const {
509 return ExternalLookup;
510 }
511
512 llvm::BumpPtrAllocator& getAllocator() {
513 return HashTable.getAllocator();
514 }
515
516 /// Return the identifier token info for the specified named
517 /// identifier.
518 IdentifierInfo &get(StringRef Name) {
519 auto &Entry = *HashTable.insert(std::make_pair(Name, nullptr)).first;
520
521 IdentifierInfo *&II = Entry.second;
522 if (II) return *II;
523
524 // No entry; if we have an external lookup, look there first.
525 if (ExternalLookup) {
526 II = ExternalLookup->get(Name);
527 if (II)
528 return *II;
529 }
530
531 // Lookups failed, make a new IdentifierInfo.
532 void *Mem = getAllocator().Allocate<IdentifierInfo>();
533 II = new (Mem) IdentifierInfo();
534
535 // Make sure getName() knows how to find the IdentifierInfo
536 // contents.
537 II->Entry = &Entry;
538
539 return *II;
540 }
541
542 IdentifierInfo &get(StringRef Name, tok::TokenKind TokenCode) {
543 IdentifierInfo &II = get(Name);
544 II.TokenID = TokenCode;
545 assert(II.TokenID == (unsigned) TokenCode && "TokenCode too large");
546 return II;
547 }
548
549 /// Gets an IdentifierInfo for the given name without consulting
550 /// external sources.
551 ///
552 /// This is a version of get() meant for external sources that want to
553 /// introduce or modify an identifier. If they called get(), they would
554 /// likely end up in a recursion.
555 IdentifierInfo &getOwn(StringRef Name) {
556 auto &Entry = *HashTable.insert(std::make_pair(Name, nullptr)).first;
557
558 IdentifierInfo *&II = Entry.second;
559 if (II)
560 return *II;
561
562 // Lookups failed, make a new IdentifierInfo.
563 void *Mem = getAllocator().Allocate<IdentifierInfo>();
564 II = new (Mem) IdentifierInfo();
565
566 // Make sure getName() knows how to find the IdentifierInfo
567 // contents.
568 II->Entry = &Entry;
569
570 // If this is the 'import' contextual keyword, mark it as such.
571 if (Name.equals("import"))
572 II->setModulesImport(true);
573
574 return *II;
575 }
576
577 using iterator = HashTableTy::const_iterator;
578 using const_iterator = HashTableTy::const_iterator;
579
580 iterator begin() const { return HashTable.begin(); }
581 iterator end() const { return HashTable.end(); }
582 unsigned size() const { return HashTable.size(); }
583
584 /// Print some statistics to stderr that indicate how well the
585 /// hashing is doing.
586 void PrintStats() const;
587
588 /// Populate the identifier table with info about the language keywords
589 /// for the language specified by \p LangOpts.
590 void AddKeywords(const LangOptions &LangOpts);
591};
592
593/// A family of Objective-C methods.
594///
595/// These families have no inherent meaning in the language, but are
596/// nonetheless central enough in the existing implementations to
597/// merit direct AST support. While, in theory, arbitrary methods can
598/// be considered to form families, we focus here on the methods
599/// involving allocation and retain-count management, as these are the
600/// most "core" and the most likely to be useful to diverse clients
601/// without extra information.
602///
603/// Both selectors and actual method declarations may be classified
604/// into families. Method families may impose additional restrictions
605/// beyond their selector name; for example, a method called '_init'
606/// that returns void is not considered to be in the 'init' family
607/// (but would be if it returned 'id'). It is also possible to
608/// explicitly change or remove a method's family. Therefore the
609/// method's family should be considered the single source of truth.
610enum ObjCMethodFamily {
611 /// No particular method family.
612 OMF_None,
613
614 // Selectors in these families may have arbitrary arity, may be
615 // written with arbitrary leading underscores, and may have
616 // additional CamelCase "words" in their first selector chunk
617 // following the family name.
618 OMF_alloc,
619 OMF_copy,
620 OMF_init,
621 OMF_mutableCopy,
622 OMF_new,
623
624 // These families are singletons consisting only of the nullary
625 // selector with the given name.
626 OMF_autorelease,
627 OMF_dealloc,
628 OMF_finalize,
629 OMF_release,
630 OMF_retain,
631 OMF_retainCount,
632 OMF_self,
633 OMF_initialize,
634
635 // performSelector families
636 OMF_performSelector
637};
638
639/// Enough bits to store any enumerator in ObjCMethodFamily or
640/// InvalidObjCMethodFamily.
641enum { ObjCMethodFamilyBitWidth = 4 };
642
643/// An invalid value of ObjCMethodFamily.
644enum { InvalidObjCMethodFamily = (1 << ObjCMethodFamilyBitWidth) - 1 };
645
646/// A family of Objective-C methods.
647///
648/// These are family of methods whose result type is initially 'id', but
649/// but are candidate for the result type to be changed to 'instancetype'.
650enum ObjCInstanceTypeFamily {
651 OIT_None,
652 OIT_Array,
653 OIT_Dictionary,
654 OIT_Singleton,
655 OIT_Init,
656 OIT_ReturnsSelf
657};
658
659enum ObjCStringFormatFamily {
660 SFF_None,
661 SFF_NSString,
662 SFF_CFString
663};
664
665/// Smart pointer class that efficiently represents Objective-C method
666/// names.
667///
668/// This class will either point to an IdentifierInfo or a
669/// MultiKeywordSelector (which is private). This enables us to optimize
670/// selectors that take no arguments and selectors that take 1 argument, which
671/// accounts for 78% of all selectors in Cocoa.h.
672class Selector {
673 friend class Diagnostic;
674 friend class SelectorTable; // only the SelectorTable can create these
675 friend class DeclarationName; // and the AST's DeclarationName.
676
677 enum IdentifierInfoFlag {
678 // Empty selector = 0. Note that these enumeration values must
679 // correspond to the enumeration values of DeclarationName::StoredNameKind
680 ZeroArg = 0x01,
681 OneArg = 0x02,
682 MultiArg = 0x07,
683 ArgFlags = 0x07
684 };
685
686 /// A pointer to the MultiKeywordSelector or IdentifierInfo. We use the low
687 /// three bits of InfoPtr to store an IdentifierInfoFlag. Note that in any
688 /// case IdentifierInfo and MultiKeywordSelector are already aligned to
689 /// 8 bytes even on 32 bits archs because of DeclarationName.
690 uintptr_t InfoPtr = 0;
691
692 Selector(IdentifierInfo *II, unsigned nArgs) {
693 InfoPtr = reinterpret_cast<uintptr_t>(II);
694 assert((InfoPtr & ArgFlags) == 0 &&"Insufficiently aligned IdentifierInfo");
695 assert(nArgs < 2 && "nArgs not equal to 0/1");
696 InfoPtr |= nArgs+1;
697 }
698
699 Selector(MultiKeywordSelector *SI) {
700 InfoPtr = reinterpret_cast<uintptr_t>(SI);
701 assert((InfoPtr & ArgFlags) == 0 &&"Insufficiently aligned IdentifierInfo");
702 InfoPtr |= MultiArg;
703 }
704
705 IdentifierInfo *getAsIdentifierInfo() const {
706 if (getIdentifierInfoFlag() < MultiArg)
707 return reinterpret_cast<IdentifierInfo *>(InfoPtr & ~ArgFlags);
708 return nullptr;
709 }
710
711 MultiKeywordSelector *getMultiKeywordSelector() const {
712 return reinterpret_cast<MultiKeywordSelector *>(InfoPtr & ~ArgFlags);
713 }
714
715 unsigned getIdentifierInfoFlag() const {
716 return InfoPtr & ArgFlags;
717 }
718
719 static ObjCMethodFamily getMethodFamilyImpl(Selector sel);
720
721 static ObjCStringFormatFamily getStringFormatFamilyImpl(Selector sel);
722
723public:
724 /// The default ctor should only be used when creating data structures that
725 /// will contain selectors.
726 Selector() = default;
727 explicit Selector(uintptr_t V) : InfoPtr(V) {}
728
729 /// operator==/!= - Indicate whether the specified selectors are identical.
730 bool operator==(Selector RHS) const {
731 return InfoPtr == RHS.InfoPtr;
732 }
733 bool operator!=(Selector RHS) const {
734 return InfoPtr != RHS.InfoPtr;
735 }
736
737 void *getAsOpaquePtr() const {
738 return reinterpret_cast<void*>(InfoPtr);
739 }
740
741 /// Determine whether this is the empty selector.
742 bool isNull() const { return InfoPtr == 0; }
743
744 // Predicates to identify the selector type.
745 bool isKeywordSelector() const {
746 return getIdentifierInfoFlag() != ZeroArg;
747 }
748
749 bool isUnarySelector() const {
750 return getIdentifierInfoFlag() == ZeroArg;
751 }
752
753 unsigned getNumArgs() const;
754
755 /// Retrieve the identifier at a given position in the selector.
756 ///
757 /// Note that the identifier pointer returned may be NULL. Clients that only
758 /// care about the text of the identifier string, and not the specific,
759 /// uniqued identifier pointer, should use \c getNameForSlot(), which returns
760 /// an empty string when the identifier pointer would be NULL.
761 ///
762 /// \param argIndex The index for which we want to retrieve the identifier.
763 /// This index shall be less than \c getNumArgs() unless this is a keyword
764 /// selector, in which case 0 is the only permissible value.
765 ///
766 /// \returns the uniqued identifier for this slot, or NULL if this slot has
767 /// no corresponding identifier.
768 IdentifierInfo *getIdentifierInfoForSlot(unsigned argIndex) const;
769
770 /// Retrieve the name at a given position in the selector.
771 ///
772 /// \param argIndex The index for which we want to retrieve the name.
773 /// This index shall be less than \c getNumArgs() unless this is a keyword
774 /// selector, in which case 0 is the only permissible value.
775 ///
776 /// \returns the name for this slot, which may be the empty string if no
777 /// name was supplied.
778 StringRef getNameForSlot(unsigned argIndex) const;
779
780 /// Derive the full selector name (e.g. "foo:bar:") and return
781 /// it as an std::string.
782 std::string getAsString() const;
783
784 /// Prints the full selector name (e.g. "foo:bar:").
785 void print(llvm::raw_ostream &OS) const;
786
787 void dump() const;
788
789 /// Derive the conventional family of this method.
790 ObjCMethodFamily getMethodFamily() const {
791 return getMethodFamilyImpl(*this);
792 }
793
794 ObjCStringFormatFamily getStringFormatFamily() const {
795 return getStringFormatFamilyImpl(*this);
796 }
797
798 static Selector getEmptyMarker() {
799 return Selector(uintptr_t(-1));
800 }
801
802 static Selector getTombstoneMarker() {
803 return Selector(uintptr_t(-2));
804 }
805
806 static ObjCInstanceTypeFamily getInstTypeMethodFamily(Selector sel);
807};
808
809/// This table allows us to fully hide how we implement
810/// multi-keyword caching.
811class SelectorTable {
812 // Actually a SelectorTableImpl
813 void *Impl;
814
815public:
816 SelectorTable();
817 SelectorTable(const SelectorTable &) = delete;
818 SelectorTable &operator=(const SelectorTable &) = delete;
819 ~SelectorTable();
820
821 /// Can create any sort of selector.
822 ///
823 /// \p NumArgs indicates whether this is a no argument selector "foo", a
824 /// single argument selector "foo:" or multi-argument "foo:bar:".
825 Selector getSelector(unsigned NumArgs, IdentifierInfo **IIV);
826
827 Selector getUnarySelector(IdentifierInfo *ID) {
828 return Selector(ID, 1);
829 }
830
831 Selector getNullarySelector(IdentifierInfo *ID) {
832 return Selector(ID, 0);
833 }
834
835 /// Return the total amount of memory allocated for managing selectors.
836 size_t getTotalMemory() const;
837
838 /// Return the default setter name for the given identifier.
839 ///
840 /// This is "set" + \p Name where the initial character of \p Name
841 /// has been capitalized.
842 static SmallString<64> constructSetterName(StringRef Name);
843
844 /// Return the default setter selector for the given identifier.
845 ///
846 /// This is "set" + \p Name where the initial character of \p Name
847 /// has been capitalized.
848 static Selector constructSetterSelector(IdentifierTable &Idents,
849 SelectorTable &SelTable,
850 const IdentifierInfo *Name);
851
852 /// Return the property name for the given setter selector.
853 static std::string getPropertyNameFromSetterSelector(Selector Sel);
854};
855
856namespace detail {
857
858/// DeclarationNameExtra is used as a base of various uncommon special names.
859/// This class is needed since DeclarationName has not enough space to store
860/// the kind of every possible names. Therefore the kind of common names is
861/// stored directly in DeclarationName, and the kind of uncommon names is
862/// stored in DeclarationNameExtra. It is aligned to 8 bytes because
863/// DeclarationName needs the lower 3 bits to store the kind of common names.
864/// DeclarationNameExtra is tightly coupled to DeclarationName and any change
865/// here is very likely to require changes in DeclarationName(Table).
866class alignas(IdentifierInfoAlignment) DeclarationNameExtra {
867 friend class clang::DeclarationName;
868 friend class clang::DeclarationNameTable;
869
870protected:
871 /// The kind of "extra" information stored in the DeclarationName. See
872 /// @c ExtraKindOrNumArgs for an explanation of how these enumerator values
873 /// are used. Note that DeclarationName depends on the numerical values
874 /// of the enumerators in this enum. See DeclarationName::StoredNameKind
875 /// for more info.
876 enum ExtraKind {
877 CXXDeductionGuideName,
878 CXXLiteralOperatorName,
879 CXXUsingDirective,
880 ObjCMultiArgSelector
881 };
882
883 /// ExtraKindOrNumArgs has one of the following meaning:
884 /// * The kind of an uncommon C++ special name. This DeclarationNameExtra
885 /// is in this case in fact either a CXXDeductionGuideNameExtra or
886 /// a CXXLiteralOperatorIdName.
887 ///
888 /// * It may be also name common to C++ using-directives (CXXUsingDirective),
889 ///
890 /// * Otherwise it is ObjCMultiArgSelector+NumArgs, where NumArgs is
891 /// the number of arguments in the Objective-C selector, in which
892 /// case the DeclarationNameExtra is also a MultiKeywordSelector.
893 unsigned ExtraKindOrNumArgs;
894
895 DeclarationNameExtra(ExtraKind Kind) : ExtraKindOrNumArgs(Kind) {}
896 DeclarationNameExtra(unsigned NumArgs)
897 : ExtraKindOrNumArgs(ObjCMultiArgSelector + NumArgs) {}
898
899 /// Return the corresponding ExtraKind.
900 ExtraKind getKind() const {
901 return static_cast<ExtraKind>(ExtraKindOrNumArgs >
902 (unsigned)ObjCMultiArgSelector
903 ? (unsigned)ObjCMultiArgSelector
904 : ExtraKindOrNumArgs);
905 }
906
907 /// Return the number of arguments in an ObjC selector. Only valid when this
908 /// is indeed an ObjCMultiArgSelector.
909 unsigned getNumArgs() const {
910 assert(ExtraKindOrNumArgs >= (unsigned)ObjCMultiArgSelector &&
911 "getNumArgs called but this is not an ObjC selector!");
912 return ExtraKindOrNumArgs - (unsigned)ObjCMultiArgSelector;
913 }
914};
915
916} // namespace detail
917
918} // namespace clang
919
920namespace llvm {
921
922/// Define DenseMapInfo so that Selectors can be used as keys in DenseMap and
923/// DenseSets.
924template <>
925struct DenseMapInfo<clang::Selector> {
926 static clang::Selector getEmptyKey() {
927 return clang::Selector::getEmptyMarker();
928 }
929
930 static clang::Selector getTombstoneKey() {
931 return clang::Selector::getTombstoneMarker();
932 }
933
934 static unsigned getHashValue(clang::Selector S);
935
936 static bool isEqual(clang::Selector LHS, clang::Selector RHS) {
937 return LHS == RHS;
938 }
939};
940
941template<>
942struct PointerLikeTypeTraits<clang::Selector> {
943 static const void *getAsVoidPointer(clang::Selector P) {
944 return P.getAsOpaquePtr();
945 }
946
947 static clang::Selector getFromVoidPointer(const void *P) {
948 return clang::Selector(reinterpret_cast<uintptr_t>(P));
949 }
950
951 enum { NumLowBitsAvailable = 0 };
952};
953
954// Provide PointerLikeTypeTraits for IdentifierInfo pointers, which
955// are not guaranteed to be 8-byte aligned.
956template<>
957struct PointerLikeTypeTraits<clang::IdentifierInfo*> {
958 static void *getAsVoidPointer(clang::IdentifierInfo* P) {
959 return P;
960 }
961
962 static clang::IdentifierInfo *getFromVoidPointer(void *P) {
963 return static_cast<clang::IdentifierInfo*>(P);
964 }
965
966 enum { NumLowBitsAvailable = 1 };
967};
968
969template<>
970struct PointerLikeTypeTraits<const clang::IdentifierInfo*> {
971 static const void *getAsVoidPointer(const clang::IdentifierInfo* P) {
972 return P;
973 }
974
975 static const clang::IdentifierInfo *getFromVoidPointer(const void *P) {
976 return static_cast<const clang::IdentifierInfo*>(P);
977 }
978
979 enum { NumLowBitsAvailable = 1 };
980};
981
982} // namespace llvm
983
984#endif // LLVM_CLANG_BASIC_IDENTIFIERTABLE_H
985