1/*
2 * Copyright (C) 2014-2015 Apple Inc. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 *
13 * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS''
14 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
15 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS
17 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
18 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
19 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
20 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
21 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
22 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
23 * THE POSSIBILITY OF SUCH DAMAGE.
24 */
25
26#ifndef StringView_h
27#define StringView_h
28
29#include <unicode/utypes.h>
30#include <wtf/Forward.h>
31#include <wtf/RetainPtr.h>
32#include <wtf/Vector.h>
33#include <wtf/text/CString.h>
34#include <wtf/text/ConversionMode.h>
35#include <wtf/text/LChar.h>
36#include <wtf/text/StringCommon.h>
37
38// FIXME: Enabling the StringView lifetime checking causes the MSVC build to fail. Figure out why.
39// FIXME: Enable StringView lifetime checking once the underlying assertions have been fixed.
40#if defined(NDEBUG) || COMPILER(MSVC) || 1
41#define CHECK_STRINGVIEW_LIFETIME 0
42#else
43#define CHECK_STRINGVIEW_LIFETIME 1
44#endif
45
46namespace WTF {
47
48// StringView is a non-owning reference to a string, similar to the proposed std::string_view.
49// Whether the string is 8-bit or 16-bit is encoded in the upper bit of the length member.
50// This means that strings longer than 2 gigacharacters cannot be represented.
51
52class StringView {
53public:
54 StringView();
55 ~StringView();
56 StringView(StringView&&);
57 StringView(const StringView&);
58 StringView& operator=(StringView&&);
59 StringView& operator=(const StringView&);
60
61 StringView(const String&);
62 StringView(const StringImpl&);
63 StringView(const StringImpl*);
64 StringView(const LChar*, unsigned length);
65 StringView(const UChar*, unsigned length);
66
67 static StringView empty();
68
69 unsigned length() const;
70 bool isEmpty() const;
71
72 explicit operator bool() const;
73 bool isNull() const;
74
75 UChar operator[](unsigned index) const;
76
77 class CodeUnits;
78 CodeUnits codeUnits() const;
79
80 class CodePoints;
81 CodePoints codePoints() const;
82
83 bool is8Bit() const;
84 const LChar* characters8() const;
85 const UChar* characters16() const;
86
87 String toString() const;
88 String toStringWithoutCopying() const;
89
90#if USE(CF)
91 // This function converts null strings to empty strings.
92 WTF_EXPORT_STRING_API RetainPtr<CFStringRef> createCFStringWithoutCopying() const;
93#endif
94
95#ifdef __OBJC__
96 // These functions convert null strings to empty strings.
97 WTF_EXPORT_STRING_API RetainPtr<NSString> createNSString() const;
98 WTF_EXPORT_STRING_API RetainPtr<NSString> createNSStringWithoutCopying() const;
99#endif
100
101 WTF_EXPORT_STRING_API CString utf8(ConversionMode = LenientConversion) const;
102
103 class UpconvertedCharacters;
104 UpconvertedCharacters upconvertedCharacters() const;
105
106 void getCharactersWithUpconvert(LChar*) const;
107 void getCharactersWithUpconvert(UChar*) const;
108
109 StringView substring(unsigned start, unsigned length = std::numeric_limits<unsigned>::max()) const;
110
111 size_t find(UChar, unsigned start = 0) const;
112
113 WTF_EXPORT_STRING_API size_t find(StringView, unsigned start) const;
114
115 WTF_EXPORT_STRING_API size_t findIgnoringASCIICase(const StringView&) const;
116 WTF_EXPORT_STRING_API size_t findIgnoringASCIICase(const StringView&, unsigned startOffset) const;
117
118 bool contains(UChar) const;
119 WTF_EXPORT_STRING_API bool containsIgnoringASCIICase(const StringView&) const;
120 WTF_EXPORT_STRING_API bool containsIgnoringASCIICase(const StringView&, unsigned startOffset) const;
121
122 WTF_EXPORT_STRING_API bool startsWith(const StringView&) const;
123 WTF_EXPORT_STRING_API bool startsWithIgnoringASCIICase(const StringView&) const;
124
125 WTF_EXPORT_STRING_API bool endsWith(const StringView&) const;
126 WTF_EXPORT_STRING_API bool endsWithIgnoringASCIICase(const StringView&) const;
127
128 int toInt() const;
129 int toInt(bool& isValid) const;
130 int toIntStrict(bool& isValid) const;
131 float toFloat(bool& isValid) const;
132
133 static void invalidate(const StringImpl&);
134
135 struct UnderlyingString;
136
137private:
138 void initialize(const LChar*, unsigned length);
139 void initialize(const UChar*, unsigned length);
140
141#if CHECK_STRINGVIEW_LIFETIME
142 WTF_EXPORT_STRING_API bool underlyingStringIsValid() const;
143 WTF_EXPORT_STRING_API void setUnderlyingString(const StringImpl*);
144 WTF_EXPORT_STRING_API void setUnderlyingString(const StringView&);
145#else
146 bool underlyingStringIsValid() const { return true; }
147 void setUnderlyingString(const StringImpl*) { }
148 void setUnderlyingString(const StringView&) { }
149#endif
150
151 static const unsigned is16BitStringFlag = 1u << 31;
152
153 const void* m_characters { nullptr };
154 unsigned m_length { 0 };
155
156#if CHECK_STRINGVIEW_LIFETIME
157 void adoptUnderlyingString(UnderlyingString*);
158 UnderlyingString* m_underlyingString { nullptr };
159#endif
160};
161
162template<typename CharacterType, size_t inlineCapacity> void append(Vector<CharacterType, inlineCapacity>&, StringView);
163
164bool equal(StringView, StringView);
165bool equal(StringView, const LChar*);
166bool equal(StringView, const char*);
167
168bool equalIgnoringASCIICase(StringView, StringView);
169bool equalIgnoringASCIICase(StringView, const char*);
170
171template<unsigned length> bool equalLettersIgnoringASCIICase(StringView, const char (&lowercaseLetters)[length]);
172
173inline bool operator==(StringView a, StringView b) { return equal(a, b); }
174inline bool operator==(StringView a, const LChar* b) { return equal(a, b); }
175inline bool operator==(StringView a, const char* b) { return equal(a, b); }
176inline bool operator==(const LChar* a, StringView b) { return equal(b, a); }
177inline bool operator==(const char* a, StringView b) { return equal(b, a); }
178
179inline bool operator!=(StringView a, StringView b) { return !equal(a, b); }
180inline bool operator!=(StringView a, const LChar* b) { return !equal(a, b); }
181inline bool operator!=(StringView a, const char* b) { return !equal(a, b); }
182inline bool operator!=(const LChar* a, StringView b) { return !equal(b, a); }
183inline bool operator!=(const char* a, StringView b) { return !equal(b, a); }
184
185}
186
187#include <wtf/text/WTFString.h>
188
189namespace WTF {
190
191inline StringView::StringView()
192{
193 // FIXME: It's peculiar that null strings are 16-bit and empty strings return 8-bit (according to the is8Bit function).
194}
195
196inline StringView::~StringView()
197{
198 setUnderlyingString(nullptr);
199}
200
201inline StringView::StringView(StringView&& other)
202 : m_characters(other.m_characters)
203 , m_length(other.m_length)
204{
205 ASSERT(other.underlyingStringIsValid());
206
207 other.m_characters = nullptr;
208 other.m_length = 0;
209
210 setUnderlyingString(other);
211 other.setUnderlyingString(nullptr);
212}
213
214inline StringView::StringView(const StringView& other)
215 : m_characters(other.m_characters)
216 , m_length(other.m_length)
217{
218 ASSERT(other.underlyingStringIsValid());
219
220 setUnderlyingString(other);
221}
222
223inline StringView& StringView::operator=(StringView&& other)
224{
225 ASSERT(other.underlyingStringIsValid());
226
227 m_characters = other.m_characters;
228 m_length = other.m_length;
229
230 other.m_characters = nullptr;
231 other.m_length = 0;
232
233 setUnderlyingString(other);
234 other.setUnderlyingString(nullptr);
235
236 return *this;
237}
238
239inline StringView& StringView::operator=(const StringView& other)
240{
241 ASSERT(other.underlyingStringIsValid());
242
243 m_characters = other.m_characters;
244 m_length = other.m_length;
245
246 setUnderlyingString(other);
247
248 return *this;
249}
250
251inline void StringView::initialize(const LChar* characters, unsigned length)
252{
253 // FIXME: We need a better solution here, because there is no guarantee that
254 // the length here won't be too long. Maybe at least a RELEASE_ASSERT?
255 ASSERT(!(length & is16BitStringFlag));
256 m_characters = characters;
257 m_length = length;
258}
259
260inline void StringView::initialize(const UChar* characters, unsigned length)
261{
262 // FIXME: We need a better solution here, because there is no guarantee that
263 // the length here won't be too long. Maybe at least a RELEASE_ASSERT?
264 ASSERT(!(length & is16BitStringFlag));
265 m_characters = characters;
266 m_length = is16BitStringFlag | length;
267}
268
269inline StringView::StringView(const LChar* characters, unsigned length)
270{
271 initialize(characters, length);
272}
273
274inline StringView::StringView(const UChar* characters, unsigned length)
275{
276 initialize(characters, length);
277}
278
279inline StringView::StringView(const StringImpl& string)
280{
281 setUnderlyingString(&string);
282 if (string.is8Bit())
283 initialize(string.characters8(), string.length());
284 else
285 initialize(string.characters16(), string.length());
286}
287
288inline StringView::StringView(const StringImpl* string)
289{
290 if (!string)
291 return;
292
293 setUnderlyingString(string);
294 if (string->is8Bit())
295 initialize(string->characters8(), string->length());
296 else
297 initialize(string->characters16(), string->length());
298}
299
300inline StringView::StringView(const String& string)
301{
302 setUnderlyingString(string.impl());
303 if (!string.impl()) {
304 m_characters = nullptr;
305 m_length = 0;
306 return;
307 }
308 if (string.is8Bit()) {
309 initialize(string.characters8(), string.length());
310 return;
311 }
312 initialize(string.characters16(), string.length());
313}
314
315inline StringView StringView::empty()
316{
317 return StringView(reinterpret_cast<const LChar*>(""), 0);
318}
319
320inline const LChar* StringView::characters8() const
321{
322 ASSERT(is8Bit());
323 ASSERT(underlyingStringIsValid());
324 return static_cast<const LChar*>(m_characters);
325}
326
327inline const UChar* StringView::characters16() const
328{
329 ASSERT(!is8Bit());
330 ASSERT(underlyingStringIsValid());
331 return static_cast<const UChar*>(m_characters);
332}
333
334class StringView::UpconvertedCharacters {
335public:
336 explicit UpconvertedCharacters(const StringView&);
337 operator const UChar*() const { return m_characters; }
338 const UChar* get() const { return m_characters; }
339private:
340 Vector<UChar, 32> m_upconvertedCharacters;
341 const UChar* m_characters;
342};
343
344inline StringView::UpconvertedCharacters StringView::upconvertedCharacters() const
345{
346 return UpconvertedCharacters(*this);
347}
348
349inline bool StringView::isNull() const
350{
351 return !m_characters;
352}
353
354inline bool StringView::isEmpty() const
355{
356 return !length();
357}
358
359inline unsigned StringView::length() const
360{
361 return m_length & ~is16BitStringFlag;
362}
363
364inline StringView::operator bool() const
365{
366 return !isNull();
367}
368
369inline bool StringView::is8Bit() const
370{
371 return !(m_length & is16BitStringFlag);
372}
373
374inline StringView StringView::substring(unsigned start, unsigned length) const
375{
376 if (start >= this->length())
377 return empty();
378 unsigned maxLength = this->length() - start;
379
380 if (length >= maxLength) {
381 if (!start)
382 return *this;
383 length = maxLength;
384 }
385
386 if (is8Bit()) {
387 StringView result(characters8() + start, length);
388 result.setUnderlyingString(*this);
389 return result;
390 }
391 StringView result(characters16() + start, length);
392 result.setUnderlyingString(*this);
393 return result;
394}
395
396inline UChar StringView::operator[](unsigned index) const
397{
398 ASSERT(index < length());
399 if (is8Bit())
400 return characters8()[index];
401 return characters16()[index];
402}
403
404inline bool StringView::contains(UChar character) const
405{
406 return find(character) != notFound;
407}
408
409inline void StringView::getCharactersWithUpconvert(LChar* destination) const
410{
411 ASSERT(is8Bit());
412 auto characters8 = this->characters8();
413 for (unsigned i = 0; i < m_length; ++i)
414 destination[i] = characters8[i];
415}
416
417inline void StringView::getCharactersWithUpconvert(UChar* destination) const
418{
419 if (is8Bit()) {
420 auto characters8 = this->characters8();
421 for (unsigned i = 0; i < m_length; ++i)
422 destination[i] = characters8[i];
423 return;
424 }
425 auto characters16 = this->characters16();
426 unsigned length = this->length();
427 for (unsigned i = 0; i < length; ++i)
428 destination[i] = characters16[i];
429}
430
431inline StringView::UpconvertedCharacters::UpconvertedCharacters(const StringView& string)
432{
433 if (!string.is8Bit()) {
434 m_characters = string.characters16();
435 return;
436 }
437 const LChar* characters8 = string.characters8();
438 unsigned length = string.m_length;
439 m_upconvertedCharacters.reserveInitialCapacity(length);
440 for (unsigned i = 0; i < length; ++i)
441 m_upconvertedCharacters.uncheckedAppend(characters8[i]);
442 m_characters = m_upconvertedCharacters.data();
443}
444
445inline String StringView::toString() const
446{
447 if (is8Bit())
448 return String(characters8(), m_length);
449 return String(characters16(), length());
450}
451
452inline float StringView::toFloat(bool& isValid) const
453{
454 if (is8Bit())
455 return charactersToFloat(characters8(), m_length, &isValid);
456 return charactersToFloat(characters16(), length(), &isValid);
457}
458
459inline int StringView::toInt() const
460{
461 bool isValid;
462 return toInt(isValid);
463}
464
465inline int StringView::toInt(bool& isValid) const
466{
467 if (is8Bit())
468 return charactersToInt(characters8(), m_length, &isValid);
469 return charactersToInt(characters16(), length(), &isValid);
470}
471
472inline int StringView::toIntStrict(bool& isValid) const
473{
474 if (is8Bit())
475 return charactersToIntStrict(characters8(), m_length, &isValid);
476 return charactersToIntStrict(characters16(), length(), &isValid);
477}
478
479inline String StringView::toStringWithoutCopying() const
480{
481 if (is8Bit())
482 return StringImpl::createWithoutCopying(characters8(), m_length);
483 return StringImpl::createWithoutCopying(characters16(), length());
484}
485
486inline size_t StringView::find(UChar character, unsigned start) const
487{
488 if (is8Bit())
489 return WTF::find(characters8(), m_length, character, start);
490 return WTF::find(characters16(), length(), character, start);
491}
492
493#if !CHECK_STRINGVIEW_LIFETIME
494inline void StringView::invalidate(const StringImpl&)
495{
496}
497#endif
498
499template<typename StringType> class StringTypeAdapter;
500
501template<> class StringTypeAdapter<StringView> {
502public:
503 StringTypeAdapter<StringView>(StringView string)
504 : m_string(string)
505 {
506 }
507
508 unsigned length() { return m_string.length(); }
509 bool is8Bit() { return m_string.is8Bit(); }
510 void writeTo(LChar* destination) { m_string.getCharactersWithUpconvert(destination); }
511 void writeTo(UChar* destination) { m_string.getCharactersWithUpconvert(destination); }
512
513 String toString() const { return m_string.toString(); }
514
515private:
516 StringView m_string;
517};
518
519template<typename CharacterType, size_t inlineCapacity> void append(Vector<CharacterType, inlineCapacity>& buffer, StringView string)
520{
521 unsigned oldSize = buffer.size();
522 buffer.grow(oldSize + string.length());
523 string.getCharactersWithUpconvert(buffer.data() + oldSize);
524}
525
526inline bool equal(StringView a, StringView b)
527{
528 return equalCommon(a, b);
529}
530
531inline bool equal(StringView a, const LChar* b)
532{
533 if (!b)
534 return !a.isEmpty();
535 if (a.isEmpty())
536 return !b;
537 unsigned aLength = a.length();
538 if (a.is8Bit())
539 return equal(a.characters8(), b, aLength);
540 return equal(a.characters16(), b, aLength);
541}
542
543inline bool equal(StringView a, const char* b)
544{
545 return equal(a, reinterpret_cast<const LChar*>(b));
546}
547
548inline bool equalIgnoringASCIICase(StringView a, StringView b)
549{
550 return equalIgnoringASCIICaseCommon(a, b);
551}
552
553inline bool equalIgnoringASCIICase(StringView a, const char* b)
554{
555 return equalIgnoringASCIICaseCommon(a, b);
556}
557
558class StringView::CodePoints {
559public:
560 explicit CodePoints(const StringView&);
561
562 class Iterator;
563 Iterator begin() const;
564 Iterator end() const;
565
566private:
567 StringView m_stringView;
568};
569
570class StringView::CodeUnits {
571public:
572 explicit CodeUnits(const StringView&);
573
574 class Iterator;
575 Iterator begin() const;
576 Iterator end() const;
577
578private:
579 StringView m_stringView;
580};
581
582class StringView::CodePoints::Iterator {
583public:
584 Iterator(const StringView&, unsigned index);
585
586 UChar32 operator*() const;
587 Iterator& operator++();
588
589 bool operator==(const Iterator&) const;
590 bool operator!=(const Iterator&) const;
591
592private:
593 const StringView& m_stringView;
594 mutable unsigned m_index;
595#if !ASSERT_DISABLED
596 mutable bool m_alreadyIncremented { false };
597#endif
598};
599
600class StringView::CodeUnits::Iterator {
601public:
602 Iterator(const StringView&, unsigned index);
603
604 UChar operator*() const;
605 Iterator& operator++();
606
607 bool operator==(const Iterator&) const;
608 bool operator!=(const Iterator&) const;
609
610private:
611 const StringView& m_stringView;
612 unsigned m_index;
613};
614
615inline auto StringView::codePoints() const -> CodePoints
616{
617 return CodePoints(*this);
618}
619
620inline auto StringView::codeUnits() const -> CodeUnits
621{
622 return CodeUnits(*this);
623}
624
625inline StringView::CodePoints::CodePoints(const StringView& stringView)
626 : m_stringView(stringView)
627{
628}
629
630inline StringView::CodePoints::Iterator::Iterator(const StringView& stringView, unsigned index)
631 : m_stringView(stringView)
632 , m_index(index)
633{
634}
635
636inline auto StringView::CodePoints::Iterator::operator++() -> Iterator&
637{
638#if !ASSERT_DISABLED
639 ASSERT(m_alreadyIncremented);
640 m_alreadyIncremented = false;
641#endif
642 return *this;
643}
644
645inline UChar32 StringView::CodePoints::Iterator::operator*() const
646{
647#if !ASSERT_DISABLED
648 ASSERT(!m_alreadyIncremented);
649 m_alreadyIncremented = true;
650#endif
651
652 if (m_stringView.is8Bit())
653 return m_stringView.characters8()[m_index++];
654
655 UChar32 codePoint;
656 U16_NEXT(m_stringView.characters16(), m_index, m_stringView.length(), codePoint);
657 return codePoint;
658}
659
660inline bool StringView::CodePoints::Iterator::operator==(const Iterator& other) const
661{
662 ASSERT(&m_stringView == &other.m_stringView);
663 ASSERT(!m_alreadyIncremented);
664 return m_index == other.m_index;
665}
666
667inline bool StringView::CodePoints::Iterator::operator!=(const Iterator& other) const
668{
669 return !(*this == other);
670}
671
672inline auto StringView::CodePoints::begin() const -> Iterator
673{
674 return Iterator(m_stringView, 0);
675}
676
677inline auto StringView::CodePoints::end() const -> Iterator
678{
679 return Iterator(m_stringView, m_stringView.length());
680}
681
682inline StringView::CodeUnits::CodeUnits(const StringView& stringView)
683 : m_stringView(stringView)
684{
685}
686
687inline StringView::CodeUnits::Iterator::Iterator(const StringView& stringView, unsigned index)
688 : m_stringView(stringView)
689 , m_index(index)
690{
691}
692
693inline auto StringView::CodeUnits::Iterator::operator++() -> Iterator&
694{
695 ++m_index;
696 return *this;
697}
698
699inline UChar StringView::CodeUnits::Iterator::operator*() const
700{
701 return m_stringView[m_index];
702}
703
704inline bool StringView::CodeUnits::Iterator::operator==(const Iterator& other) const
705{
706 ASSERT(&m_stringView == &other.m_stringView);
707 return m_index == other.m_index;
708}
709
710inline bool StringView::CodeUnits::Iterator::operator!=(const Iterator& other) const
711{
712 return !(*this == other);
713}
714
715inline auto StringView::CodeUnits::begin() const -> Iterator
716{
717 return Iterator(m_stringView, 0);
718}
719
720inline auto StringView::CodeUnits::end() const -> Iterator
721{
722 return Iterator(m_stringView, m_stringView.length());
723}
724
725template<unsigned length> inline bool equalLettersIgnoringASCIICase(StringView string, const char (&lowercaseLetters)[length])
726{
727 return equalLettersIgnoringASCIICaseCommon(string, lowercaseLetters);
728}
729
730} // namespace WTF
731
732using WTF::append;
733using WTF::equal;
734using WTF::StringView;
735
736#endif // StringView_h
737