1/*
2 * Copyright (C) 1999 Lars Knoll (knoll@kde.org)
3 * Copyright (C) 2005-2010, 2013-2016 Apple Inc. All rights reserved.
4 * Copyright (C) 2009 Google Inc. All rights reserved.
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Library General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Library General Public License for more details.
15 *
16 * You should have received a copy of the GNU Library General Public License
17 * along with this library; see the file COPYING.LIB. If not, write to
18 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
19 * Boston, MA 02110-1301, USA.
20 *
21 */
22
23#ifndef StringImpl_h
24#define StringImpl_h
25
26#include <limits.h>
27#include <unicode/uchar.h>
28#include <unicode/ustring.h>
29#include <wtf/ASCIICType.h>
30#include <wtf/Forward.h>
31#include <wtf/Hasher.h>
32#include <wtf/MathExtras.h>
33#include <wtf/StdLibExtras.h>
34#include <wtf/Vector.h>
35#include <wtf/text/ConversionMode.h>
36#include <wtf/text/StringCommon.h>
37
38#if PLATFORM(QT)
39#include <QString>
40#endif
41
42#if USE(CF)
43typedef const struct __CFString * CFStringRef;
44#endif
45
46#ifdef __OBJC__
47@class NSString;
48#endif
49
50namespace JSC {
51namespace LLInt { class Data; }
52class LLIntOffsetsExtractor;
53}
54
55namespace WTF {
56
57class SymbolImpl;
58class SymbolRegistry;
59
60struct CStringTranslator;
61struct CharBufferFromLiteralDataTranslator;
62struct HashAndUTF8CharactersTranslator;
63struct LCharBufferTranslator;
64struct StringHash;
65struct SubstringTranslator;
66struct UCharBufferTranslator;
67
68template<typename> class RetainPtr;
69
70template<typename> struct HashAndCharactersTranslator;
71
72enum TextCaseSensitivity {
73 TextCaseSensitive,
74 TextCaseInsensitive
75};
76
77typedef bool (*CharacterMatchFunctionPtr)(UChar);
78typedef bool (*IsWhiteSpaceFunctionPtr)(UChar);
79
80// Define STRING_STATS to 1 turn on run time statistics of string sizes and memory usage
81#define STRING_STATS 0
82
83#if STRING_STATS
84struct StringStats {
85 inline void add8BitString(unsigned length, bool isSubString = false)
86 {
87 ++m_totalNumberStrings;
88 ++m_number8BitStrings;
89 if (!isSubString)
90 m_total8BitData += length;
91 }
92
93 inline void add16BitString(unsigned length, bool isSubString = false)
94 {
95 ++m_totalNumberStrings;
96 ++m_number16BitStrings;
97 if (!isSubString)
98 m_total16BitData += length;
99 }
100
101 void removeString(StringImpl&);
102 void printStats();
103
104 static const unsigned s_printStringStatsFrequency = 5000;
105 static std::atomic<unsigned> s_stringRemovesTillPrintStats;
106
107 std::atomic<unsigned> m_refCalls;
108 std::atomic<unsigned> m_derefCalls;
109
110 std::atomic<unsigned> m_totalNumberStrings;
111 std::atomic<unsigned> m_number8BitStrings;
112 std::atomic<unsigned> m_number16BitStrings;
113 std::atomic<unsigned long long> m_total8BitData;
114 std::atomic<unsigned long long> m_total16BitData;
115};
116
117#define STRING_STATS_ADD_8BIT_STRING(length) StringImpl::stringStats().add8BitString(length)
118#define STRING_STATS_ADD_8BIT_STRING2(length, isSubString) StringImpl::stringStats().add8BitString(length, isSubString)
119#define STRING_STATS_ADD_16BIT_STRING(length) StringImpl::stringStats().add16BitString(length)
120#define STRING_STATS_ADD_16BIT_STRING2(length, isSubString) StringImpl::stringStats().add16BitString(length, isSubString)
121#define STRING_STATS_REMOVE_STRING(string) StringImpl::stringStats().removeString(string)
122#define STRING_STATS_REF_STRING(string) ++StringImpl::stringStats().m_refCalls;
123#define STRING_STATS_DEREF_STRING(string) ++StringImpl::stringStats().m_derefCalls;
124#else
125#define STRING_STATS_ADD_8BIT_STRING(length) ((void)0)
126#define STRING_STATS_ADD_8BIT_STRING2(length, isSubString) ((void)0)
127#define STRING_STATS_ADD_16BIT_STRING(length) ((void)0)
128#define STRING_STATS_ADD_16BIT_STRING2(length, isSubString) ((void)0)
129#define STRING_STATS_ADD_UPCONVERTED_STRING(length) ((void)0)
130#define STRING_STATS_REMOVE_STRING(string) ((void)0)
131#define STRING_STATS_REF_STRING(string) ((void)0)
132#define STRING_STATS_DEREF_STRING(string) ((void)0)
133#endif
134
135class StringImpl {
136 WTF_MAKE_NONCOPYABLE(StringImpl); WTF_MAKE_FAST_ALLOCATED;
137 friend struct WTF::CStringTranslator;
138 template<typename CharacterType> friend struct WTF::HashAndCharactersTranslator;
139 friend struct WTF::HashAndUTF8CharactersTranslator;
140 friend struct WTF::CharBufferFromLiteralDataTranslator;
141 friend struct WTF::LCharBufferTranslator;
142 friend struct WTF::SubstringTranslator;
143 friend struct WTF::UCharBufferTranslator;
144 friend class JSC::LLInt::Data;
145 friend class JSC::LLIntOffsetsExtractor;
146
147private:
148 enum BufferOwnership {
149 BufferInternal,
150 BufferOwned,
151 BufferSubstring,
152 };
153
154 // The bottom 6 bits in the hash are flags.
155 static const unsigned s_flagCount = 6;
156 static const unsigned s_flagMask = (1u << s_flagCount) - 1;
157 COMPILE_ASSERT(s_flagCount <= StringHasher::flagCount, StringHasher_reserves_enough_bits_for_StringImpl_flags);
158 static const unsigned s_flagStringKindCount = 4;
159
160 static const unsigned s_hashFlagStringKindIsAtomic = 1u << (s_flagStringKindCount);
161 static const unsigned s_hashFlagStringKindIsSymbol = 1u << (s_flagStringKindCount + 1);
162 static const unsigned s_hashMaskStringKind = s_hashFlagStringKindIsAtomic | s_hashFlagStringKindIsSymbol;
163 static const unsigned s_hashFlag8BitBuffer = 1u << 3;
164 static const unsigned s_hashFlagDidReportCost = 1u << 2;
165 static const unsigned s_hashMaskBufferOwnership = (1u << 0) | (1u << 1);
166
167 enum StringKind {
168 StringNormal = 0u, // non-symbol, non-atomic
169 StringAtomic = s_hashFlagStringKindIsAtomic, // non-symbol, atomic
170 StringSymbol = s_hashFlagStringKindIsSymbol, // symbol, non-atomic
171 };
172
173 // Used to construct static strings, which have an special refCount that can never hit zero.
174 // This means that the static string will never be destroyed, which is important because
175 // static strings will be shared across threads & ref-counted in a non-threadsafe manner.
176 friend class NeverDestroyed<StringImpl>;
177 enum ConstructEmptyStringTag { ConstructEmptyString };
178 StringImpl(ConstructEmptyStringTag)
179 : m_refCount(s_refCountFlagIsStaticString)
180 , m_length(0)
181 , m_data8(reinterpret_cast<const LChar*>(&m_length))
182 , m_hashAndFlags(s_hashFlag8BitBuffer | StringAtomic | BufferOwned)
183 {
184 // Ensure that the hash is computed so that AtomicStringHash can call existingHash()
185 // with impunity. The empty string is special because it is never entered into
186 // AtomicString's HashKey, but still needs to compare correctly.
187 STRING_STATS_ADD_8BIT_STRING(m_length);
188
189 hash();
190 }
191
192 // FIXME: there has to be a less hacky way to do this.
193 enum Force8Bit { Force8BitConstructor };
194 // Create a normal 8-bit string with internal storage (BufferInternal)
195 StringImpl(unsigned length, Force8Bit)
196 : m_refCount(s_refCountIncrement)
197 , m_length(length)
198 , m_data8(tailPointer<LChar>())
199 , m_hashAndFlags(s_hashFlag8BitBuffer | StringNormal | BufferInternal)
200 {
201 ASSERT(m_data8);
202 ASSERT(m_length);
203
204 STRING_STATS_ADD_8BIT_STRING(m_length);
205 }
206
207 // Create a normal 16-bit string with internal storage (BufferInternal)
208 StringImpl(unsigned length)
209 : m_refCount(s_refCountIncrement)
210 , m_length(length)
211 , m_data16(tailPointer<UChar>())
212 , m_hashAndFlags(StringNormal | BufferInternal)
213 {
214 ASSERT(m_data16);
215 ASSERT(m_length);
216
217 STRING_STATS_ADD_16BIT_STRING(m_length);
218 }
219
220 // Create a StringImpl adopting ownership of the provided buffer (BufferOwned)
221 StringImpl(MallocPtr<LChar> characters, unsigned length)
222 : m_refCount(s_refCountIncrement)
223 , m_length(length)
224 , m_data8(characters.leakPtr())
225 , m_hashAndFlags(s_hashFlag8BitBuffer | StringNormal | BufferOwned)
226 {
227 ASSERT(m_data8);
228 ASSERT(m_length);
229
230 STRING_STATS_ADD_8BIT_STRING(m_length);
231 }
232
233 enum ConstructWithoutCopyingTag { ConstructWithoutCopying };
234 StringImpl(const UChar* characters, unsigned length, ConstructWithoutCopyingTag)
235 : m_refCount(s_refCountIncrement)
236 , m_length(length)
237 , m_data16(characters)
238 , m_hashAndFlags(StringNormal | BufferInternal)
239 {
240 ASSERT(m_data16);
241 ASSERT(m_length);
242
243 STRING_STATS_ADD_16BIT_STRING(m_length);
244 }
245
246 StringImpl(const LChar* characters, unsigned length, ConstructWithoutCopyingTag)
247 : m_refCount(s_refCountIncrement)
248 , m_length(length)
249 , m_data8(characters)
250 , m_hashAndFlags(s_hashFlag8BitBuffer | StringNormal | BufferInternal)
251 {
252 ASSERT(m_data8);
253 ASSERT(m_length);
254
255 STRING_STATS_ADD_8BIT_STRING(m_length);
256 }
257
258 // Create a StringImpl adopting ownership of the provided buffer (BufferOwned)
259 StringImpl(MallocPtr<UChar> characters, unsigned length)
260 : m_refCount(s_refCountIncrement)
261 , m_length(length)
262 , m_data16(characters.leakPtr())
263 , m_hashAndFlags(StringNormal | BufferOwned)
264 {
265 ASSERT(m_data16);
266 ASSERT(m_length);
267
268 STRING_STATS_ADD_16BIT_STRING(m_length);
269 }
270
271 // Used to create new strings that are a substring of an existing 8-bit StringImpl (BufferSubstring)
272 StringImpl(const LChar* characters, unsigned length, PassRefPtr<StringImpl> base)
273 : m_refCount(s_refCountIncrement)
274 , m_length(length)
275 , m_data8(characters)
276 , m_hashAndFlags(s_hashFlag8BitBuffer | StringNormal | BufferSubstring)
277 {
278 ASSERT(is8Bit());
279 ASSERT(m_data8);
280 ASSERT(m_length);
281 ASSERT(base->bufferOwnership() != BufferSubstring);
282
283 substringBuffer() = base.leakRef();
284
285 STRING_STATS_ADD_8BIT_STRING2(m_length, true);
286 }
287
288 // Used to create new strings that are a substring of an existing 16-bit StringImpl (BufferSubstring)
289 StringImpl(const UChar* characters, unsigned length, PassRefPtr<StringImpl> base)
290 : m_refCount(s_refCountIncrement)
291 , m_length(length)
292 , m_data16(characters)
293 , m_hashAndFlags(StringNormal | BufferSubstring)
294 {
295 ASSERT(!is8Bit());
296 ASSERT(m_data16);
297 ASSERT(m_length);
298 ASSERT(base->bufferOwnership() != BufferSubstring);
299
300 substringBuffer() = base.leakRef();
301
302 STRING_STATS_ADD_16BIT_STRING2(m_length, true);
303 }
304
305 enum CreateSymbolTag { CreateSymbol };
306 // Used to create new symbol strings that holds existing 8-bit [[Description]] string as a substring buffer (BufferSubstring).
307 StringImpl(CreateSymbolTag, const LChar* characters, unsigned length, PassRefPtr<StringImpl> base)
308 : m_refCount(s_refCountIncrement)
309 , m_length(length)
310 , m_data8(characters)
311 , m_hashAndFlags(s_hashFlag8BitBuffer | StringSymbol | BufferSubstring)
312 {
313 ASSERT(is8Bit());
314 ASSERT(m_data8);
315 ASSERT(base->bufferOwnership() != BufferSubstring);
316
317 substringBuffer() = base.leakRef();
318 symbolRegistry() = nullptr;
319 hashForSymbol() = nextHashForSymbol();
320
321 STRING_STATS_ADD_8BIT_STRING2(m_length, true);
322 }
323
324 // Used to create new symbol strings that holds existing 16-bit [[Description]] string as a substring buffer (BufferSubstring).
325 StringImpl(CreateSymbolTag, const UChar* characters, unsigned length, PassRefPtr<StringImpl> base)
326 : m_refCount(s_refCountIncrement)
327 , m_length(length)
328 , m_data16(characters)
329 , m_hashAndFlags(StringSymbol | BufferSubstring)
330 {
331 ASSERT(!is8Bit());
332 ASSERT(m_data16);
333 ASSERT(base->bufferOwnership() != BufferSubstring);
334
335 substringBuffer() = base.leakRef();
336 symbolRegistry() = nullptr;
337 hashForSymbol() = nextHashForSymbol();
338
339 STRING_STATS_ADD_16BIT_STRING2(m_length, true);
340 }
341
342public:
343 WTF_EXPORT_STRING_API static void destroy(StringImpl*);
344
345 WTF_EXPORT_STRING_API static Ref<StringImpl> create(const UChar*, unsigned length);
346 WTF_EXPORT_STRING_API static Ref<StringImpl> create(const LChar*, unsigned length);
347 WTF_EXPORT_STRING_API static Ref<StringImpl> create8BitIfPossible(const UChar*, unsigned length);
348 template<size_t inlineCapacity>
349 static Ref<StringImpl> create8BitIfPossible(const Vector<UChar, inlineCapacity>& vector)
350 {
351 return create8BitIfPossible(vector.data(), vector.size());
352 }
353 WTF_EXPORT_STRING_API static Ref<StringImpl> create8BitIfPossible(const UChar*);
354
355 ALWAYS_INLINE static Ref<StringImpl> create(const char* s, unsigned length) { return create(reinterpret_cast<const LChar*>(s), length); }
356 WTF_EXPORT_STRING_API static Ref<StringImpl> create(const LChar*);
357 ALWAYS_INLINE static Ref<StringImpl> create(const char* s) { return create(reinterpret_cast<const LChar*>(s)); }
358
359 static ALWAYS_INLINE Ref<StringImpl> createSubstringSharingImpl(PassRefPtr<StringImpl> rep, unsigned offset, unsigned length)
360 {
361 ASSERT(rep);
362 ASSERT(length <= rep->length());
363
364 if (!length)
365 return *empty();
366
367 StringImpl* ownerRep = (rep->bufferOwnership() == BufferSubstring) ? rep->substringBuffer() : rep.get();
368
369 // We allocate a buffer that contains both the StringImpl struct as well as the pointer to the owner string.
370 StringImpl* stringImpl = static_cast<StringImpl*>(fastMalloc(allocationSize<StringImpl*>(1)));
371 if (rep->is8Bit())
372 return adoptRef(*new (NotNull, stringImpl) StringImpl(rep->m_data8 + offset, length, ownerRep));
373 return adoptRef(*new (NotNull, stringImpl) StringImpl(rep->m_data16 + offset, length, ownerRep));
374 }
375
376 template<unsigned charactersCount>
377 ALWAYS_INLINE static Ref<StringImpl> createFromLiteral(const char (&characters)[charactersCount])
378 {
379 COMPILE_ASSERT(charactersCount > 1, StringImplFromLiteralNotEmpty);
380 COMPILE_ASSERT((charactersCount - 1 <= ((unsigned(~0) - sizeof(StringImpl)) / sizeof(LChar))), StringImplFromLiteralCannotOverflow);
381
382 return createWithoutCopying(reinterpret_cast<const LChar*>(characters), charactersCount - 1);
383 }
384
385 // FIXME: Transition off of these functions to createWithoutCopying instead.
386 WTF_EXPORT_STRING_API static Ref<StringImpl> createFromLiteral(const char* characters, unsigned length);
387 WTF_EXPORT_STRING_API static Ref<StringImpl> createFromLiteral(const char* characters);
388
389 WTF_EXPORT_STRING_API static Ref<StringImpl> createWithoutCopying(const UChar* characters, unsigned length);
390 WTF_EXPORT_STRING_API static Ref<StringImpl> createWithoutCopying(const LChar* characters, unsigned length);
391
392 WTF_EXPORT_STRING_API static Ref<StringImpl> createUninitialized(unsigned length, LChar*& data);
393 WTF_EXPORT_STRING_API static Ref<StringImpl> createUninitialized(unsigned length, UChar*& data);
394 template <typename T> static ALWAYS_INLINE PassRefPtr<StringImpl> tryCreateUninitialized(unsigned length, T*& output)
395 {
396 if (!length) {
397 output = 0;
398 return empty();
399 }
400
401 if (length > ((std::numeric_limits<unsigned>::max() - sizeof(StringImpl)) / sizeof(T))) {
402 output = 0;
403 return 0;
404 }
405 StringImpl* resultImpl;
406 if (!tryFastMalloc(allocationSize<T>(length)).getValue(resultImpl)) {
407 output = 0;
408 return 0;
409 }
410 output = resultImpl->tailPointer<T>();
411
412 return constructInternal<T>(resultImpl, length);
413 }
414
415 WTF_EXPORT_STRING_API static Ref<SymbolImpl> createSymbolEmpty();
416 WTF_EXPORT_STRING_API static Ref<SymbolImpl> createSymbol(PassRefPtr<StringImpl> rep);
417
418 // Reallocate the StringImpl. The originalString must be only owned by the PassRefPtr,
419 // and the buffer ownership must be BufferInternal. Just like the input pointer of realloc(),
420 // the originalString can't be used after this function.
421 static Ref<StringImpl> reallocate(PassRefPtr<StringImpl> originalString, unsigned length, LChar*& data);
422 static Ref<StringImpl> reallocate(PassRefPtr<StringImpl> originalString, unsigned length, UChar*& data);
423
424 static unsigned flagsOffset() { return OBJECT_OFFSETOF(StringImpl, m_hashAndFlags); }
425 static unsigned flagIs8Bit() { return s_hashFlag8BitBuffer; }
426 static unsigned flagIsAtomic() { return s_hashFlagStringKindIsAtomic; }
427 static unsigned flagIsSymbol() { return s_hashFlagStringKindIsSymbol; }
428 static unsigned maskStringKind() { return s_hashMaskStringKind; }
429 static unsigned dataOffset() { return OBJECT_OFFSETOF(StringImpl, m_data8); }
430
431 template<typename CharType, size_t inlineCapacity, typename OverflowHandler>
432 static Ref<StringImpl> adopt(Vector<CharType, inlineCapacity, OverflowHandler>& vector)
433 {
434 if (size_t size = vector.size()) {
435 ASSERT(vector.data());
436 if (size > std::numeric_limits<unsigned>::max())
437 CRASH();
438 return adoptRef(*new StringImpl(vector.releaseBuffer(), size));
439 }
440 return *empty();
441 }
442
443 WTF_EXPORT_STRING_API static Ref<StringImpl> adopt(StringBuffer<UChar>&);
444 WTF_EXPORT_STRING_API static Ref<StringImpl> adopt(StringBuffer<LChar>&);
445
446 unsigned length() const { return m_length; }
447 static ptrdiff_t lengthMemoryOffset() { return OBJECT_OFFSETOF(StringImpl, m_length); }
448 bool is8Bit() const { return m_hashAndFlags & s_hashFlag8BitBuffer; }
449
450 ALWAYS_INLINE const LChar* characters8() const { ASSERT(is8Bit()); return m_data8; }
451 ALWAYS_INLINE const UChar* characters16() const { ASSERT(!is8Bit()); return m_data16; }
452
453 template <typename CharType>
454 ALWAYS_INLINE const CharType *characters() const;
455
456 size_t cost() const
457 {
458 // For substrings, return the cost of the base string.
459 if (bufferOwnership() == BufferSubstring)
460 return substringBuffer()->cost();
461
462 if (m_hashAndFlags & s_hashFlagDidReportCost)
463 return 0;
464
465 m_hashAndFlags |= s_hashFlagDidReportCost;
466 size_t result = m_length;
467 if (!is8Bit())
468 result <<= 1;
469 return result;
470 }
471
472 size_t costDuringGC()
473 {
474 if (isStatic())
475 return 0;
476
477 if (bufferOwnership() == BufferSubstring)
478 return divideRoundedUp(substringBuffer()->costDuringGC(), refCount());
479
480 size_t result = m_length;
481 if (!is8Bit())
482 result <<= 1;
483 return divideRoundedUp(result, refCount());
484 }
485
486 WTF_EXPORT_STRING_API size_t sizeInBytes() const;
487
488 StringKind stringKind() const { return static_cast<StringKind>(m_hashAndFlags & s_hashMaskStringKind); }
489 bool isSymbol() const { return m_hashAndFlags & s_hashFlagStringKindIsSymbol; }
490 bool isAtomic() const { return m_hashAndFlags & s_hashFlagStringKindIsAtomic; }
491
492 void setIsAtomic(bool isAtomic)
493 {
494 ASSERT(!isStatic());
495 ASSERT(!isSymbol());
496 if (isAtomic) {
497 m_hashAndFlags |= s_hashFlagStringKindIsAtomic;
498 ASSERT(stringKind() == StringAtomic);
499 } else {
500 m_hashAndFlags &= ~s_hashFlagStringKindIsAtomic;
501 ASSERT(stringKind() == StringNormal);
502 }
503 }
504
505#if STRING_STATS
506 bool isSubString() const { return bufferOwnership() == BufferSubstring; }
507#endif
508
509 static WTF_EXPORT_STRING_API CString utf8ForCharacters(const LChar* characters, unsigned length);
510 static WTF_EXPORT_STRING_API CString utf8ForCharacters(const UChar* characters, unsigned length, ConversionMode = LenientConversion);
511 WTF_EXPORT_STRING_API CString utf8ForRange(unsigned offset, unsigned length, ConversionMode = LenientConversion) const;
512 WTF_EXPORT_STRING_API CString utf8(ConversionMode = LenientConversion) const;
513
514private:
515 static WTF_EXPORT_STRING_API bool utf8Impl(const UChar* characters, unsigned length, char*& buffer, size_t bufferSize, ConversionMode);
516
517 // The high bits of 'hash' are always empty, but we prefer to store our flags
518 // in the low bits because it makes them slightly more efficient to access.
519 // So, we shift left and right when setting and getting our hash code.
520 void setHash(unsigned hash) const
521 {
522 ASSERT(!hasHash());
523 // Multiple clients assume that StringHasher is the canonical string hash function.
524 ASSERT(hash == (is8Bit() ? StringHasher::computeHashAndMaskTop8Bits(m_data8, m_length) : StringHasher::computeHashAndMaskTop8Bits(m_data16, m_length)));
525 ASSERT(!(hash & (s_flagMask << (8 * sizeof(hash) - s_flagCount)))); // Verify that enough high bits are empty.
526
527 hash <<= s_flagCount;
528 ASSERT(!(hash & m_hashAndFlags)); // Verify that enough low bits are empty after shift.
529 ASSERT(hash); // Verify that 0 is a valid sentinel hash value.
530
531 m_hashAndFlags |= hash; // Store hash with flags in low bits.
532 }
533
534 unsigned rawHash() const
535 {
536 return m_hashAndFlags >> s_flagCount;
537 }
538
539public:
540 bool hasHash() const
541 {
542 return rawHash() != 0;
543 }
544
545 unsigned existingHash() const
546 {
547 ASSERT(hasHash());
548 return rawHash();
549 }
550
551 unsigned hash() const
552 {
553 if (hasHash())
554 return existingHash();
555 return hashSlowCase();
556 }
557
558 unsigned symbolAwareHash() const
559 {
560 if (isSymbol())
561 return hashForSymbol();
562 return hash();
563 }
564
565 unsigned existingSymbolAwareHash() const
566 {
567 if (isSymbol())
568 return hashForSymbol();
569 return existingHash();
570 }
571
572 bool isStatic() const { return m_refCount & s_refCountFlagIsStaticString; }
573
574 inline size_t refCount() const
575 {
576 return m_refCount / s_refCountIncrement;
577 }
578
579 inline bool hasOneRef() const
580 {
581 return m_refCount == s_refCountIncrement;
582 }
583
584 // This method is useful for assertions.
585 inline bool hasAtLeastOneRef() const
586 {
587 return !!m_refCount;
588 }
589
590 inline void ref()
591 {
592 ASSERT(!isCompilationThread());
593
594 STRING_STATS_REF_STRING(*this);
595
596 m_refCount += s_refCountIncrement;
597 }
598
599 inline void deref()
600 {
601 ASSERT(!isCompilationThread());
602
603 STRING_STATS_DEREF_STRING(*this);
604
605 unsigned tempRefCount = m_refCount - s_refCountIncrement;
606 if (!tempRefCount) {
607 StringImpl::destroy(this);
608 return;
609 }
610 m_refCount = tempRefCount;
611 }
612
613 WTF_EXPORT_PRIVATE static StringImpl* empty();
614
615 // FIXME: Does this really belong in StringImpl?
616 template <typename T> static void copyChars(T* destination, const T* source, unsigned numCharacters)
617 {
618 if (numCharacters == 1) {
619 *destination = *source;
620 return;
621 }
622
623 if (numCharacters <= s_copyCharsInlineCutOff) {
624 unsigned i = 0;
625#if (CPU(X86) || CPU(X86_64))
626 const unsigned charsPerInt = sizeof(uint32_t) / sizeof(T);
627
628 if (numCharacters > charsPerInt) {
629 unsigned stopCount = numCharacters & ~(charsPerInt - 1);
630
631 const uint32_t* srcCharacters = reinterpret_cast<const uint32_t*>(source);
632 uint32_t* destCharacters = reinterpret_cast<uint32_t*>(destination);
633 for (unsigned j = 0; i < stopCount; i += charsPerInt, ++j)
634 destCharacters[j] = srcCharacters[j];
635 }
636#endif
637 for (; i < numCharacters; ++i)
638 destination[i] = source[i];
639 } else
640 memcpy(destination, source, numCharacters * sizeof(T));
641 }
642
643 ALWAYS_INLINE static void copyChars(UChar* destination, const LChar* source, unsigned numCharacters)
644 {
645 for (unsigned i = 0; i < numCharacters; ++i)
646 destination[i] = source[i];
647 }
648
649 // Some string features, like refcounting and the atomicity flag, are not
650 // thread-safe. We achieve thread safety by isolation, giving each thread
651 // its own copy of the string.
652 Ref<StringImpl> isolatedCopy() const;
653
654 WTF_EXPORT_STRING_API Ref<StringImpl> substring(unsigned pos, unsigned len = UINT_MAX);
655
656 UChar at(unsigned i) const
657 {
658 ASSERT_WITH_SECURITY_IMPLICATION(i < m_length);
659 if (is8Bit())
660 return m_data8[i];
661 return m_data16[i];
662 }
663 UChar operator[](unsigned i) const { return at(i); }
664 WTF_EXPORT_STRING_API UChar32 characterStartingAt(unsigned);
665
666 WTF_EXPORT_STRING_API bool containsOnlyWhitespace();
667
668 int toIntStrict(bool* ok = 0, int base = 10);
669 unsigned toUIntStrict(bool* ok = 0, int base = 10);
670 int64_t toInt64Strict(bool* ok = 0, int base = 10);
671 uint64_t toUInt64Strict(bool* ok = 0, int base = 10);
672 intptr_t toIntPtrStrict(bool* ok = 0, int base = 10);
673
674 WTF_EXPORT_STRING_API int toInt(bool* ok = 0); // ignores trailing garbage
675 unsigned toUInt(bool* ok = 0); // ignores trailing garbage
676 int64_t toInt64(bool* ok = 0); // ignores trailing garbage
677 uint64_t toUInt64(bool* ok = 0); // ignores trailing garbage
678 intptr_t toIntPtr(bool* ok = 0); // ignores trailing garbage
679
680 // FIXME: Like the strict functions above, these give false for "ok" when there is trailing garbage.
681 // Like the non-strict functions above, these return the value when there is trailing garbage.
682 // It would be better if these were more consistent with the above functions instead.
683 double toDouble(bool* ok = 0);
684 float toFloat(bool* ok = 0);
685
686 WTF_EXPORT_STRING_API Ref<StringImpl> convertToASCIILowercase();
687 WTF_EXPORT_STRING_API Ref<StringImpl> convertToASCIIUppercase();
688 WTF_EXPORT_STRING_API Ref<StringImpl> convertToLowercaseWithoutLocale();
689 WTF_EXPORT_STRING_API Ref<StringImpl> convertToUppercaseWithoutLocale();
690 WTF_EXPORT_STRING_API Ref<StringImpl> convertToLowercaseWithLocale(const AtomicString& localeIdentifier);
691 WTF_EXPORT_STRING_API Ref<StringImpl> convertToUppercaseWithLocale(const AtomicString& localeIdentifier);
692
693 Ref<StringImpl> foldCase();
694
695 Ref<StringImpl> stripWhiteSpace();
696 Ref<StringImpl> stripWhiteSpace(IsWhiteSpaceFunctionPtr);
697 WTF_EXPORT_STRING_API Ref<StringImpl> simplifyWhiteSpace();
698 Ref<StringImpl> simplifyWhiteSpace(IsWhiteSpaceFunctionPtr);
699
700 Ref<StringImpl> removeCharacters(CharacterMatchFunctionPtr);
701 template <typename CharType>
702 ALWAYS_INLINE Ref<StringImpl> removeCharacters(const CharType* characters, CharacterMatchFunctionPtr);
703
704 size_t find(LChar character, unsigned start = 0);
705 size_t find(char character, unsigned start = 0);
706 size_t find(UChar character, unsigned start = 0);
707 WTF_EXPORT_STRING_API size_t find(CharacterMatchFunctionPtr, unsigned index = 0);
708 size_t find(const LChar*, unsigned index = 0);
709 ALWAYS_INLINE size_t find(const char* s, unsigned index = 0) { return find(reinterpret_cast<const LChar*>(s), index); }
710 WTF_EXPORT_STRING_API size_t find(StringImpl*);
711 WTF_EXPORT_STRING_API size_t find(StringImpl*, unsigned index);
712 size_t findIgnoringCase(const LChar*, unsigned index = 0);
713 ALWAYS_INLINE size_t findIgnoringCase(const char* s, unsigned index = 0) { return findIgnoringCase(reinterpret_cast<const LChar*>(s), index); }
714 WTF_EXPORT_STRING_API size_t findIgnoringCase(StringImpl*, unsigned index = 0);
715 WTF_EXPORT_STRING_API size_t findIgnoringASCIICase(const StringImpl&) const;
716 WTF_EXPORT_STRING_API size_t findIgnoringASCIICase(const StringImpl&, unsigned startOffset) const;
717 WTF_EXPORT_STRING_API size_t findIgnoringASCIICase(const StringImpl*) const;
718 WTF_EXPORT_STRING_API size_t findIgnoringASCIICase(const StringImpl*, unsigned startOffset) const;
719
720 WTF_EXPORT_STRING_API size_t findNextLineStart(unsigned index = UINT_MAX);
721
722 WTF_EXPORT_STRING_API size_t reverseFind(UChar, unsigned index = UINT_MAX);
723 WTF_EXPORT_STRING_API size_t reverseFind(StringImpl*, unsigned index = UINT_MAX);
724 WTF_EXPORT_STRING_API size_t reverseFindIgnoringCase(StringImpl*, unsigned index = UINT_MAX);
725
726 WTF_EXPORT_STRING_API bool startsWith(const StringImpl*) const;
727 WTF_EXPORT_STRING_API bool startsWith(const StringImpl&) const;
728 WTF_EXPORT_STRING_API bool startsWithIgnoringASCIICase(const StringImpl*) const;
729 WTF_EXPORT_STRING_API bool startsWithIgnoringASCIICase(const StringImpl&) const;
730 bool startsWith(StringImpl* str, bool caseSensitive) { return caseSensitive ? startsWith(str) : (reverseFindIgnoringCase(str, 0) == 0); }
731 WTF_EXPORT_STRING_API bool startsWith(UChar) const;
732 WTF_EXPORT_STRING_API bool startsWith(const char*, unsigned matchLength, bool caseSensitive) const;
733 template<unsigned matchLength>
734 bool startsWith(const char (&prefix)[matchLength], bool caseSensitive = true) const { return startsWith(prefix, matchLength - 1, caseSensitive); }
735 WTF_EXPORT_STRING_API bool hasInfixStartingAt(const StringImpl&, unsigned startOffset) const;
736
737 WTF_EXPORT_STRING_API bool endsWith(StringImpl*);
738 WTF_EXPORT_STRING_API bool endsWith(StringImpl&);
739 WTF_EXPORT_STRING_API bool endsWithIgnoringASCIICase(const StringImpl*) const;
740 WTF_EXPORT_STRING_API bool endsWithIgnoringASCIICase(const StringImpl&) const;
741 WTF_EXPORT_STRING_API bool endsWith(StringImpl*, bool caseSensitive);
742 WTF_EXPORT_STRING_API bool endsWith(UChar) const;
743 WTF_EXPORT_STRING_API bool endsWith(const char*, unsigned matchLength, bool caseSensitive) const;
744 template<unsigned matchLength>
745 bool endsWith(const char (&prefix)[matchLength], bool caseSensitive = true) const { return endsWith(prefix, matchLength - 1, caseSensitive); }
746 WTF_EXPORT_STRING_API bool hasInfixEndingAt(const StringImpl&, unsigned endOffset) const;
747
748 WTF_EXPORT_STRING_API Ref<StringImpl> replace(UChar, UChar);
749 WTF_EXPORT_STRING_API Ref<StringImpl> replace(UChar, StringImpl*);
750 ALWAYS_INLINE Ref<StringImpl> replace(UChar pattern, const char* replacement, unsigned replacementLength) { return replace(pattern, reinterpret_cast<const LChar*>(replacement), replacementLength); }
751 WTF_EXPORT_STRING_API Ref<StringImpl> replace(UChar, const LChar*, unsigned replacementLength);
752 Ref<StringImpl> replace(UChar, const UChar*, unsigned replacementLength);
753 WTF_EXPORT_STRING_API Ref<StringImpl> replace(StringImpl*, StringImpl*);
754 WTF_EXPORT_STRING_API Ref<StringImpl> replace(unsigned index, unsigned len, StringImpl*);
755
756 WTF_EXPORT_STRING_API UCharDirection defaultWritingDirection(bool* hasStrongDirectionality = nullptr);
757
758#if USE(CF)
759 RetainPtr<CFStringRef> createCFString();
760#endif
761#ifdef __OBJC__
762 WTF_EXPORT_STRING_API operator NSString *();
763#endif
764
765#if STRING_STATS
766 ALWAYS_INLINE static StringStats& stringStats() { return m_stringStats; }
767#endif
768
769 Ref<StringImpl> extractFoldedStringInSymbol()
770 {
771 ASSERT(isSymbol());
772 ASSERT(bufferOwnership() == BufferSubstring);
773 ASSERT(substringBuffer());
774 ASSERT(!substringBuffer()->isSymbol());
775 return createSubstringSharingImpl(this, 0, length());
776 }
777
778 SymbolRegistry* const& symbolRegistry() const
779 {
780 ASSERT(isSymbol());
781 return *(tailPointer<SymbolRegistry*>() + 1);
782 }
783
784 SymbolRegistry*& symbolRegistry()
785 {
786 ASSERT(isSymbol());
787 return *(tailPointer<SymbolRegistry*>() + 1);
788 }
789
790 const unsigned& hashForSymbol() const
791 {
792 return const_cast<StringImpl*>(this)->hashForSymbol();
793 }
794
795 unsigned& hashForSymbol()
796 {
797 ASSERT(isSymbol());
798 return *reinterpret_cast<unsigned*>((tailPointer<SymbolRegistry*>() + 2));
799 }
800
801protected:
802 ~StringImpl();
803
804private:
805 bool requiresCopy() const
806 {
807 if (bufferOwnership() != BufferInternal)
808 return true;
809
810 if (is8Bit())
811 return m_data8 == tailPointer<LChar>();
812 return m_data16 == tailPointer<UChar>();
813 }
814
815 template<typename T>
816 static size_t allocationSize(unsigned tailElementCount)
817 {
818 return tailOffset<T>() + tailElementCount * sizeof(T);
819 }
820
821 template<typename T>
822 static ptrdiff_t tailOffset()
823 {
824#if COMPILER(MSVC)
825 // MSVC doesn't support alignof yet.
826 return roundUpToMultipleOf<sizeof(T)>(sizeof(StringImpl));
827#else
828 return roundUpToMultipleOf<alignof(T)>(offsetof(StringImpl, m_hashAndFlags) + sizeof(StringImpl::m_hashAndFlags));
829#endif
830 }
831
832 template<typename T>
833 const T* tailPointer() const
834 {
835 return reinterpret_cast_ptr<const T*>(reinterpret_cast<const uint8_t*>(this) + tailOffset<T>());
836 }
837
838 template<typename T>
839 T* tailPointer()
840 {
841 return reinterpret_cast_ptr<T*>(reinterpret_cast<uint8_t*>(this) + tailOffset<T>());
842 }
843
844 StringImpl* const& substringBuffer() const
845 {
846 ASSERT(bufferOwnership() == BufferSubstring);
847
848 return *tailPointer<StringImpl*>();
849 }
850
851 StringImpl*& substringBuffer()
852 {
853 ASSERT(bufferOwnership() == BufferSubstring);
854
855 return *tailPointer<StringImpl*>();
856 }
857
858 // This number must be at least 2 to avoid sharing empty, null as well as 1 character strings from SmallStrings.
859 static const unsigned s_copyCharsInlineCutOff = 20;
860
861 enum class CaseConvertType { Upper, Lower };
862 template<CaseConvertType type, typename CharacterType> static Ref<StringImpl> convertASCIICase(StringImpl&, const CharacterType*, unsigned);
863
864 BufferOwnership bufferOwnership() const { return static_cast<BufferOwnership>(m_hashAndFlags & s_hashMaskBufferOwnership); }
865 template <class UCharPredicate> Ref<StringImpl> stripMatchedCharacters(UCharPredicate);
866 template <typename CharType, class UCharPredicate> Ref<StringImpl> simplifyMatchedCharactersToSpace(UCharPredicate);
867 template <typename CharType> static Ref<StringImpl> constructInternal(StringImpl*, unsigned);
868 template <typename CharType> static Ref<StringImpl> createUninitializedInternal(unsigned, CharType*&);
869 template <typename CharType> static Ref<StringImpl> createUninitializedInternalNonEmpty(unsigned, CharType*&);
870 template <typename CharType> static Ref<StringImpl> reallocateInternal(PassRefPtr<StringImpl>, unsigned, CharType*&);
871 template <typename CharType> static Ref<StringImpl> createInternal(const CharType*, unsigned);
872 WTF_EXPORT_PRIVATE NEVER_INLINE unsigned hashSlowCase() const;
873 WTF_EXPORT_PRIVATE static unsigned nextHashForSymbol();
874
875 // The bottom bit in the ref count indicates a static (immortal) string.
876 static const unsigned s_refCountFlagIsStaticString = 0x1;
877 static const unsigned s_refCountIncrement = 0x2; // This allows us to ref / deref without disturbing the static string flag.
878
879#if STRING_STATS
880 WTF_EXPORTDATA static StringStats m_stringStats;
881#endif
882
883public:
884 struct StaticASCIILiteral {
885 // These member variables must match the layout of StringImpl.
886 unsigned m_refCount;
887 unsigned m_length;
888 const LChar* m_data8;
889 unsigned m_hashAndFlags;
890
891 // These values mimic ConstructFromLiteral.
892 static const unsigned s_initialRefCount = s_refCountIncrement;
893 static const unsigned s_initialFlags = s_hashFlag8BitBuffer | StringNormal | BufferInternal;
894 static const unsigned s_hashShift = s_flagCount;
895 };
896
897#ifndef NDEBUG
898 void assertHashIsCorrect()
899 {
900 ASSERT(hasHash());
901 ASSERT(existingHash() == StringHasher::computeHashAndMaskTop8Bits(characters8(), length()));
902 }
903#endif
904
905private:
906 // These member variables must match the layout of StaticASCIILiteral.
907 unsigned m_refCount;
908 unsigned m_length;
909 union {
910 const LChar* m_data8;
911 const UChar* m_data16;
912 };
913 mutable unsigned m_hashAndFlags;
914};
915
916static_assert(sizeof(StringImpl) == sizeof(StringImpl::StaticASCIILiteral), "");
917
918#if !ASSERT_DISABLED
919// StringImpls created from StaticASCIILiteral will ASSERT
920// in the generic ValueCheck<T>::checkConsistency
921// as they are not allocated by fastMalloc.
922// We don't currently have any way to detect that case
923// so we ignore the consistency check for all StringImpl*.
924template<> struct
925ValueCheck<StringImpl*> {
926 static void checkConsistency(const StringImpl*) { }
927};
928#endif
929
930template <>
931ALWAYS_INLINE Ref<StringImpl> StringImpl::constructInternal<LChar>(StringImpl* impl, unsigned length) { return adoptRef(*new (NotNull, impl) StringImpl(length, Force8BitConstructor)); }
932template <>
933ALWAYS_INLINE Ref<StringImpl> StringImpl::constructInternal<UChar>(StringImpl* impl, unsigned length) { return adoptRef(*new (NotNull, impl) StringImpl(length)); }
934
935template <>
936ALWAYS_INLINE const LChar* StringImpl::characters<LChar>() const { return characters8(); }
937
938template <>
939ALWAYS_INLINE const UChar* StringImpl::characters<UChar>() const { return characters16(); }
940
941WTF_EXPORT_STRING_API bool equal(const StringImpl*, const StringImpl*);
942WTF_EXPORT_STRING_API bool equal(const StringImpl*, const LChar*);
943inline bool equal(const StringImpl* a, const char* b) { return equal(a, reinterpret_cast<const LChar*>(b)); }
944WTF_EXPORT_STRING_API bool equal(const StringImpl*, const LChar*, unsigned);
945WTF_EXPORT_STRING_API bool equal(const StringImpl*, const UChar*, unsigned);
946inline bool equal(const StringImpl* a, const char* b, unsigned length) { return equal(a, reinterpret_cast<const LChar*>(b), length); }
947inline bool equal(const LChar* a, StringImpl* b) { return equal(b, a); }
948inline bool equal(const char* a, StringImpl* b) { return equal(b, reinterpret_cast<const LChar*>(a)); }
949WTF_EXPORT_STRING_API bool equal(const StringImpl& a, const StringImpl& b);
950
951WTF_EXPORT_STRING_API bool equalIgnoringNullity(StringImpl*, StringImpl*);
952WTF_EXPORT_STRING_API bool equalIgnoringNullity(const UChar*, size_t length, StringImpl*);
953
954bool equalIgnoringASCIICase(const StringImpl&, const StringImpl&);
955WTF_EXPORT_STRING_API bool equalIgnoringASCIICase(const StringImpl*, const StringImpl*);
956WTF_EXPORT_STRING_API bool equalIgnoringASCIICase(const StringImpl&, const char*);
957WTF_EXPORT_STRING_API bool equalIgnoringASCIICase(const StringImpl*, const char*);
958
959WTF_EXPORT_STRING_API bool equalIgnoringASCIICaseNonNull(const StringImpl*, const StringImpl*);
960
961template<unsigned length> bool equalLettersIgnoringASCIICase(const StringImpl&, const char (&lowercaseLetters)[length]);
962template<unsigned length> bool equalLettersIgnoringASCIICase(const StringImpl*, const char (&lowercaseLetters)[length]);
963
964inline size_t find(const LChar* characters, unsigned length, CharacterMatchFunctionPtr matchFunction, unsigned index = 0)
965{
966 while (index < length) {
967 if (matchFunction(characters[index]))
968 return index;
969 ++index;
970 }
971 return notFound;
972}
973
974inline size_t find(const UChar* characters, unsigned length, CharacterMatchFunctionPtr matchFunction, unsigned index = 0)
975{
976 while (index < length) {
977 if (matchFunction(characters[index]))
978 return index;
979 ++index;
980 }
981 return notFound;
982}
983
984template<typename CharacterType>
985inline size_t findNextLineStart(const CharacterType* characters, unsigned length, unsigned index = 0)
986{
987 while (index < length) {
988 CharacterType c = characters[index++];
989 if ((c != '\n') && (c != '\r'))
990 continue;
991
992 // There can only be a start of a new line if there are more characters
993 // beyond the current character.
994 if (index < length) {
995 // The 3 common types of line terminators are 1. \r\n (Windows),
996 // 2. \r (old MacOS) and 3. \n (Unix'es).
997
998 if (c == '\n')
999 return index; // Case 3: just \n.
1000
1001 CharacterType c2 = characters[index];
1002 if (c2 != '\n')
1003 return index; // Case 2: just \r.
1004
1005 // Case 1: \r\n.
1006 // But, there's only a start of a new line if there are more
1007 // characters beyond the \r\n.
1008 if (++index < length)
1009 return index;
1010 }
1011 }
1012 return notFound;
1013}
1014
1015template<typename CharacterType>
1016inline size_t reverseFindLineTerminator(const CharacterType* characters, unsigned length, unsigned index = UINT_MAX)
1017{
1018 if (!length)
1019 return notFound;
1020 if (index >= length)
1021 index = length - 1;
1022 CharacterType c = characters[index];
1023 while ((c != '\n') && (c != '\r')) {
1024 if (!index--)
1025 return notFound;
1026 c = characters[index];
1027 }
1028 return index;
1029}
1030
1031template<typename CharacterType>
1032inline size_t reverseFind(const CharacterType* characters, unsigned length, CharacterType matchCharacter, unsigned index = UINT_MAX)
1033{
1034 if (!length)
1035 return notFound;
1036 if (index >= length)
1037 index = length - 1;
1038 while (characters[index] != matchCharacter) {
1039 if (!index--)
1040 return notFound;
1041 }
1042 return index;
1043}
1044
1045ALWAYS_INLINE size_t reverseFind(const UChar* characters, unsigned length, LChar matchCharacter, unsigned index = UINT_MAX)
1046{
1047 return reverseFind(characters, length, static_cast<UChar>(matchCharacter), index);
1048}
1049
1050inline size_t reverseFind(const LChar* characters, unsigned length, UChar matchCharacter, unsigned index = UINT_MAX)
1051{
1052 if (matchCharacter & ~0xFF)
1053 return notFound;
1054 return reverseFind(characters, length, static_cast<LChar>(matchCharacter), index);
1055}
1056
1057inline size_t StringImpl::find(LChar character, unsigned start)
1058{
1059 if (is8Bit())
1060 return WTF::find(characters8(), m_length, character, start);
1061 return WTF::find(characters16(), m_length, character, start);
1062}
1063
1064ALWAYS_INLINE size_t StringImpl::find(char character, unsigned start)
1065{
1066 return find(static_cast<LChar>(character), start);
1067}
1068
1069inline size_t StringImpl::find(UChar character, unsigned start)
1070{
1071 if (is8Bit())
1072 return WTF::find(characters8(), m_length, character, start);
1073 return WTF::find(characters16(), m_length, character, start);
1074}
1075
1076template<size_t inlineCapacity> inline bool equalIgnoringNullity(const Vector<UChar, inlineCapacity>& a, StringImpl* b)
1077{
1078 return equalIgnoringNullity(a.data(), a.size(), b);
1079}
1080
1081template<typename CharacterType1, typename CharacterType2>
1082inline int codePointCompare(unsigned l1, unsigned l2, const CharacterType1* c1, const CharacterType2* c2)
1083{
1084 const unsigned lmin = l1 < l2 ? l1 : l2;
1085 unsigned pos = 0;
1086 while (pos < lmin && *c1 == *c2) {
1087 ++c1;
1088 ++c2;
1089 ++pos;
1090 }
1091
1092 if (pos < lmin)
1093 return (c1[0] > c2[0]) ? 1 : -1;
1094
1095 if (l1 == l2)
1096 return 0;
1097
1098 return (l1 > l2) ? 1 : -1;
1099}
1100
1101inline int codePointCompare8(const StringImpl* string1, const StringImpl* string2)
1102{
1103 return codePointCompare(string1->length(), string2->length(), string1->characters8(), string2->characters8());
1104}
1105
1106inline int codePointCompare16(const StringImpl* string1, const StringImpl* string2)
1107{
1108 return codePointCompare(string1->length(), string2->length(), string1->characters16(), string2->characters16());
1109}
1110
1111inline int codePointCompare8To16(const StringImpl* string1, const StringImpl* string2)
1112{
1113 return codePointCompare(string1->length(), string2->length(), string1->characters8(), string2->characters16());
1114}
1115
1116inline int codePointCompare(const StringImpl* string1, const StringImpl* string2)
1117{
1118 if (!string1)
1119 return (string2 && string2->length()) ? -1 : 0;
1120
1121 if (!string2)
1122 return string1->length() ? 1 : 0;
1123
1124 bool string1Is8Bit = string1->is8Bit();
1125 bool string2Is8Bit = string2->is8Bit();
1126 if (string1Is8Bit) {
1127 if (string2Is8Bit)
1128 return codePointCompare8(string1, string2);
1129 return codePointCompare8To16(string1, string2);
1130 }
1131 if (string2Is8Bit)
1132 return -codePointCompare8To16(string2, string1);
1133 return codePointCompare16(string1, string2);
1134}
1135
1136inline bool isSpaceOrNewline(UChar c)
1137{
1138 // Use isASCIISpace() for basic Latin-1.
1139 // This will include newlines, which aren't included in Unicode DirWS.
1140 return c <= 0x7F ? isASCIISpace(c) : u_charDirection(c) == U_WHITE_SPACE_NEUTRAL;
1141}
1142
1143template<typename CharacterType>
1144inline unsigned lengthOfNullTerminatedString(const CharacterType* string)
1145{
1146 ASSERT(string);
1147 size_t length = 0;
1148 while (string[length])
1149 ++length;
1150
1151 RELEASE_ASSERT(length < std::numeric_limits<unsigned>::max());
1152 return static_cast<unsigned>(length);
1153}
1154
1155inline Ref<StringImpl> StringImpl::isolatedCopy() const
1156{
1157 if (!requiresCopy()) {
1158 if (is8Bit())
1159 return StringImpl::createWithoutCopying(m_data8, m_length);
1160 return StringImpl::createWithoutCopying(m_data16, m_length);
1161 }
1162
1163 if (is8Bit())
1164 return create(m_data8, m_length);
1165 return create(m_data16, m_length);
1166}
1167
1168// StringHash is the default hash for StringImpl* and RefPtr<StringImpl>
1169template<typename T> struct DefaultHash;
1170template<> struct DefaultHash<StringImpl*> {
1171 typedef StringHash Hash;
1172};
1173template<> struct DefaultHash<RefPtr<StringImpl>> {
1174 typedef StringHash Hash;
1175};
1176
1177inline bool equalIgnoringASCIICase(const StringImpl& a, const StringImpl& b)
1178{
1179 return equalIgnoringASCIICaseCommon(a, b);
1180}
1181
1182inline bool equalIgnoringASCIICase(const StringImpl& a, const char* b)
1183{
1184 return equalIgnoringASCIICaseCommon(a, b);
1185}
1186
1187inline bool equalIgnoringASCIICase(const StringImpl* a, const char* b)
1188{
1189 return a && equalIgnoringASCIICase(*a, b);
1190}
1191
1192template<unsigned length> inline bool equalLettersIgnoringASCIICase(const StringImpl& string, const char (&lowercaseLetters)[length])
1193{
1194 return equalLettersIgnoringASCIICaseCommon(string, lowercaseLetters);
1195}
1196
1197template<unsigned length> inline bool equalLettersIgnoringASCIICase(const StringImpl* string, const char (&lowercaseLetters)[length])
1198{
1199 return string && equalLettersIgnoringASCIICase(*string, lowercaseLetters);
1200}
1201
1202} // namespace WTF
1203
1204using WTF::StringImpl;
1205using WTF::equal;
1206using WTF::TextCaseSensitivity;
1207using WTF::TextCaseSensitive;
1208using WTF::TextCaseInsensitive;
1209
1210#endif
1211