1/***************************************************************************
2 copyright : (C) 2002 - 2008 by Scott Wheeler
3 email : wheeler@kde.org
4 ***************************************************************************/
5
6/***************************************************************************
7 * This library is free software; you can redistribute it and/or modify *
8 * it under the terms of the GNU Lesser General Public License version *
9 * 2.1 as published by the Free Software Foundation. *
10 * *
11 * This library is distributed in the hope that it will be useful, but *
12 * WITHOUT ANY WARRANTY; without even the implied warranty of *
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU *
14 * Lesser General Public License for more details. *
15 * *
16 * You should have received a copy of the GNU Lesser General Public *
17 * License along with this library; if not, write to the Free Software *
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA *
19 * 02110-1301 USA *
20 * *
21 * Alternatively, this file is available under the Mozilla Public *
22 * License Version 1.1. You may obtain a copy of the License at *
23 * http://www.mozilla.org/MPL/ *
24 ***************************************************************************/
25
26#ifndef TAGLIB_STRING_H
27#define TAGLIB_STRING_H
28
29#include "taglib_export.h"
30#include "taglib.h"
31#include "tbytevector.h"
32
33#include <string>
34#include <iostream>
35
36/*!
37 * \relates TagLib::String
38 *
39 * Converts a QString to a TagLib::String without a requirement to link to Qt.
40 *
41 * \note consider conversion via usual char-by-char for loop to avoid UTF16->UTF8->UTF16
42 * conversion happening in the background
43 */
44
45#if defined(QT_VERSION) && (QT_VERSION >= 0x040000)
46#define QStringToTString(s) TagLib::String(s.toUtf8().data(), TagLib::String::UTF8)
47#else
48#define QStringToTString(s) TagLib::String(s.utf8().data(), TagLib::String::UTF8)
49#endif
50
51/*!
52 * \relates TagLib::String
53 *
54 * Converts a TagLib::String to a QString without a requirement to link to Qt.
55 *
56 * \note consider conversion via usual char-by-char for loop to avoid UTF16->UTF8->UTF16
57 * conversion happening in the background
58 *
59 */
60
61#define TStringToQString(s) QString::fromUtf8(s.toCString(true))
62
63namespace TagLib {
64
65 class StringList;
66
67 //! A \e wide string class suitable for unicode.
68
69 /*!
70 * This is an implicitly shared \e wide string. For storage it uses
71 * TagLib::wstring, but as this is an <i>implementation detail</i> this of
72 * course could change. Strings are stored internally as UTF-16(without BOM/
73 * CPU byte order)
74 *
75 * The use of implicit sharing means that copying a string is cheap, the only
76 * \e cost comes into play when the copy is modified. Prior to that the string
77 * just has a pointer to the data of the \e parent String. This also makes
78 * this class suitable as a function return type.
79 *
80 * In addition to adding implicit sharing, this class keeps track of four
81 * possible encodings, which are the four supported by the ID3v2 standard.
82 */
83
84 class TAGLIB_EXPORT String
85 {
86 public:
87
88#ifndef DO_NOT_DOCUMENT
89 typedef TagLib::wstring::iterator Iterator;
90 typedef TagLib::wstring::const_iterator ConstIterator;
91#endif
92
93 /**
94 * The four types of string encodings supported by the ID3v2 specification.
95 * ID3v1 is assumed to be Latin1 and Ogg Vorbis comments use UTF8.
96 */
97 enum Type {
98 /*!
99 * IS08859-1, or <i>Latin1</i> encoding. 8 bit characters.
100 */
101 Latin1 = 0,
102 /*!
103 * UTF16 with a <i>byte order mark</i>. 16 bit characters.
104 */
105 UTF16 = 1,
106 /*!
107 * UTF16 <i>big endian</i>. 16 bit characters. This is the encoding used
108 * internally by TagLib.
109 */
110 UTF16BE = 2,
111 /*!
112 * UTF8 encoding. Characters are usually 8 bits but can be up to 32.
113 */
114 UTF8 = 3,
115 /*!
116 * UTF16 <i>little endian</i>. 16 bit characters.
117 */
118 UTF16LE = 4
119 };
120
121 /*!
122 * Constructs an empty String.
123 */
124 String();
125
126 /*!
127 * Make a shallow, implicitly shared, copy of \a s. Because this is
128 * implicitly shared, this method is lightweight and suitable for
129 * pass-by-value usage.
130 */
131 String(const String &s);
132
133 /*!
134 * Makes a deep copy of the data in \a s.
135 *
136 * \note This should only be used with the 8-bit codecs Latin1 and UTF8, when
137 * used with other codecs it will simply print a warning and exit.
138 */
139 String(const std::string &s, Type t = Latin1);
140
141 /*!
142 * Makes a deep copy of the data in \a s.
143 *
144 * /note If \a t is UTF16LE, the byte order of \a s will be swapped regardless
145 * of the CPU byte order. If UTF16BE, it will not be swapped. This behavior
146 * will be changed in TagLib2.0.
147 */
148 String(const wstring &s, Type t = UTF16BE);
149
150 /*!
151 * Makes a deep copy of the data in \a s.
152 *
153 * /note If \a t is UTF16LE, the byte order of \a s will be swapped regardless
154 * of the CPU byte order. If UTF16BE, it will not be swapped. This behavior
155 * will be changed in TagLib2.0.
156 */
157 String(const wchar_t *s, Type t = UTF16BE);
158
159 /*!
160 * Makes a deep copy of the data in \a c.
161 *
162 * \note This should only be used with the 8-bit codecs Latin1 and UTF8, when
163 * used with other codecs it will simply print a warning and exit.
164 */
165 String(char c, Type t = Latin1);
166
167 /*!
168 * Makes a deep copy of the data in \a c.
169 */
170 String(wchar_t c, Type t = Latin1);
171
172 /*!
173 * Makes a deep copy of the data in \a s.
174 *
175 * \note This should only be used with the 8-bit codecs Latin1 and UTF8, when
176 * used with other codecs it will simply print a warning and exit.
177 */
178 String(const char *s, Type t = Latin1);
179
180 /*!
181 * Makes a deep copy of the data in \a v.
182 */
183 String(const ByteVector &v, Type t = Latin1);
184
185 /*!
186 * Destroys this String instance.
187 */
188 virtual ~String();
189
190 /*!
191 * Returns a deep copy of this String as an std::string. The returned string
192 * is encoded in UTF8 if \a unicode is true, otherwise Latin1.
193 *
194 * \see toCString()
195 */
196 std::string to8Bit(bool unicode = false) const;
197
198 /*!
199 * Returns a deep copy of this String as a wstring. The returned string is
200 * encoded in UTF-16 (without BOM/CPU byte order), not UTF-32 even if wchar_t
201 * is 32-bit wide.
202 *
203 * \see toCWString()
204 */
205 wstring toWString() const;
206
207 /*!
208 * Creates and returns a standard C-style (null-terminated) version of this
209 * String. The returned string is encoded in UTF8 if \a unicode is true,
210 * otherwise Latin1.
211 *
212 * The returned string is still owned by this String and should not be deleted
213 * by the user.
214 *
215 * The returned pointer remains valid until this String instance is destroyed
216 * or toCString() is called again.
217 *
218 * \warning This however has the side effect that the returned string will remain
219 * in memory <b>in addition to</b> other memory that is consumed by this
220 * String instance. So, this method should not be used on large strings or
221 * where memory is critical. Consider using to8Bit() instead to avoid it.
222 *
223 * \see to8Bit()
224 */
225 const char *toCString(bool unicode = false) const;
226
227 /*!
228 * Returns a standard C-style (null-terminated) wide character version of
229 * this String. The returned string is encoded in UTF-16 (without BOM/CPU byte
230 * order), not UTF-32 even if wchar_t is 32-bit wide.
231 *
232 * The returned string is still owned by this String and should not be deleted
233 * by the user.
234 *
235 * The returned pointer remains valid until this String instance is destroyed
236 * or any other method of this String is called.
237 *
238 * \note This returns a pointer to the String's internal data without any
239 * conversions.
240 *
241 * \see toWString()
242 */
243 const wchar_t *toCWString() const;
244
245 /*!
246 * Returns an iterator pointing to the beginning of the string.
247 */
248 Iterator begin();
249
250 /*!
251 * Returns a const iterator pointing to the beginning of the string.
252 */
253 ConstIterator begin() const;
254
255 /*!
256 * Returns an iterator pointing to the end of the string (the position
257 * after the last character).
258 */
259 Iterator end();
260
261 /*!
262 * Returns a const iterator pointing to the end of the string (the position
263 * after the last character).
264 */
265 ConstIterator end() const;
266
267 /*!
268 * Finds the first occurrence of pattern \a s in this string starting from
269 * \a offset. If the pattern is not found, -1 is returned.
270 */
271 int find(const String &s, int offset = 0) const;
272
273 /*!
274 * Finds the last occurrence of pattern \a s in this string, searched backwards,
275 * either from the end of the string or starting from \a offset. If the pattern
276 * is not found, -1 is returned.
277 */
278 int rfind(const String &s, int offset = -1) const;
279
280 /*!
281 * Splits the string on each occurrence of \a separator.
282 */
283 StringList split(const String &separator = " ") const;
284
285 /*!
286 * Returns true if the strings starts with the substring \a s.
287 */
288 bool startsWith(const String &s) const;
289
290 /*!
291 * Extract a substring from this string starting at \a position and
292 * continuing for \a n characters.
293 */
294 String substr(unsigned int position, unsigned int n = 0xffffffff) const;
295
296 /*!
297 * Append \a s to the current string and return a reference to the current
298 * string.
299 */
300 String &append(const String &s);
301
302 /*!
303 * Clears the string.
304 */
305 String &clear();
306
307 /*!
308 * Returns an upper case version of the string.
309 *
310 * \warning This only works for the characters in US-ASCII, i.e. A-Z.
311 */
312 String upper() const;
313
314 /*!
315 * Returns the size of the string.
316 */
317 unsigned int size() const;
318
319 /*!
320 * Returns the length of the string. Equivalent to size().
321 */
322 unsigned int length() const;
323
324 /*!
325 * Returns true if the string is empty.
326 *
327 * \see isNull()
328 */
329 bool isEmpty() const;
330
331 /*!
332 * Returns true if this string is null -- i.e. it is a copy of the
333 * String::null string.
334 *
335 * \note A string can be empty and not null. So do not use this method to
336 * check if the string is empty.
337 *
338 * \see isEmpty()
339 *
340 * \deprecated
341 */
342 // BIC: remove
343 bool isNull() const;
344
345 /*!
346 * Returns a ByteVector containing the string's data. If \a t is Latin1 or
347 * UTF8, this will return a vector of 8 bit characters, otherwise it will use
348 * 16 bit characters.
349 *
350 * \note If \a t is UTF16, the returned data is encoded in little-endian
351 * format and has a BOM.
352 *
353 * \note The returned data is not null terminated.
354 */
355 ByteVector data(Type t) const;
356
357 /*!
358 * Convert the string to an integer.
359 *
360 * Returns the integer if the conversion was successful or 0 if the
361 * string does not represent a number.
362 */
363 // BIC: merge with the method below
364 int toInt() const;
365
366 /*!
367 * Convert the string to an integer.
368 *
369 * If the conversion was successful, it sets the value of \a *ok to
370 * true and returns the integer. Otherwise it sets \a *ok to false
371 * and the result is undefined.
372 */
373 int toInt(bool *ok) const;
374
375 /*!
376 * Returns a string with the leading and trailing whitespace stripped.
377 */
378 String stripWhiteSpace() const;
379
380 /*!
381 * Returns true if the file only uses characters required by Latin1.
382 */
383 bool isLatin1() const;
384
385 /*!
386 * Returns true if the file only uses characters required by (7-bit) ASCII.
387 */
388 bool isAscii() const;
389
390 /*!
391 * Converts the base-10 integer \a n to a string.
392 */
393 static String number(int n);
394
395 /*!
396 * Returns a reference to the character at position \a i.
397 */
398 wchar_t &operator[](int i);
399
400 /*!
401 * Returns a const reference to the character at position \a i.
402 */
403 const wchar_t &operator[](int i) const;
404
405 /*!
406 * Compares each character of the String with each character of \a s and
407 * returns true if the strings match.
408 */
409 bool operator==(const String &s) const;
410
411 /*!
412 * Compares each character of the String with each character of \a s and
413 * returns false if the strings match.
414 */
415 bool operator!=(const String &s) const;
416
417 /*!
418 * Compares each character of the String with each character of \a s and
419 * returns true if the strings match.
420 */
421 bool operator==(const char *s) const;
422
423 /*!
424 * Compares each character of the String with each character of \a s and
425 * returns false if the strings match.
426 */
427 bool operator!=(const char *s) const;
428
429 /*!
430 * Compares each character of the String with each character of \a s and
431 * returns true if the strings match.
432 */
433 bool operator==(const wchar_t *s) const;
434
435 /*!
436 * Compares each character of the String with each character of \a s and
437 * returns false if the strings match.
438 */
439 bool operator!=(const wchar_t *s) const;
440
441 /*!
442 * Appends \a s to the end of the String.
443 */
444 String &operator+=(const String &s);
445
446 /*!
447 * Appends \a s to the end of the String.
448 */
449 String &operator+=(const wchar_t* s);
450
451 /*!
452 * Appends \a s to the end of the String.
453 */
454 String &operator+=(const char* s);
455
456 /*!
457 * Appends \a s to the end of the String.
458 */
459 String &operator+=(wchar_t c);
460
461 /*!
462 * Appends \a c to the end of the String.
463 */
464 String &operator+=(char c);
465
466 /*!
467 * Performs a shallow, implicitly shared, copy of \a s, overwriting the
468 * String's current data.
469 */
470 String &operator=(const String &s);
471
472 /*!
473 * Performs a deep copy of the data in \a s.
474 */
475 String &operator=(const std::string &s);
476
477 /*!
478 * Performs a deep copy of the data in \a s.
479 */
480 String &operator=(const wstring &s);
481
482 /*!
483 * Performs a deep copy of the data in \a s.
484 */
485 String &operator=(const wchar_t *s);
486
487 /*!
488 * Performs a deep copy of the data in \a s.
489 */
490 String &operator=(char c);
491
492 /*!
493 * Performs a deep copy of the data in \a s.
494 */
495 String &operator=(wchar_t c);
496
497 /*!
498 * Performs a deep copy of the data in \a s.
499 */
500 String &operator=(const char *s);
501
502 /*!
503 * Performs a deep copy of the data in \a v.
504 */
505 String &operator=(const ByteVector &v);
506
507 /*!
508 * Exchanges the content of the String by the content of \a s.
509 */
510 void swap(String &s);
511
512 /*!
513 * To be able to use this class in a Map, this operator needed to be
514 * implemented. Returns true if \a s is less than this string in a byte-wise
515 * comparison.
516 */
517 bool operator<(const String &s) const;
518
519 /*!
520 * A null string provided for convenience.
521 *
522 * \warning Do not modify this variable. It will mess up the internal state
523 * of TagLib.
524 *
525 * \deprecated
526 */
527 // BIC: remove
528 static String null;
529
530 protected:
531 /*!
532 * If this String is being shared via implicit sharing, do a deep copy of the
533 * data and separate from the shared members. This should be called by all
534 * non-const subclass members.
535 */
536 void detach();
537
538 private:
539 /*!
540 * \deprecated This variable is no longer used, but NEVER remove this. It
541 * may lead to a linkage error.
542 */
543 // BIC: remove
544 static const Type WCharByteOrder;
545
546 class StringPrivate;
547 StringPrivate *d;
548 };
549}
550
551/*!
552 * \relates TagLib::String
553 *
554 * Concatenates \a s1 and \a s2 and returns the result as a string.
555 */
556TAGLIB_EXPORT const TagLib::String operator+(const TagLib::String &s1, const TagLib::String &s2);
557
558/*!
559 * \relates TagLib::String
560 *
561 * Concatenates \a s1 and \a s2 and returns the result as a string.
562 */
563TAGLIB_EXPORT const TagLib::String operator+(const char *s1, const TagLib::String &s2);
564
565/*!
566 * \relates TagLib::String
567 *
568 * Concatenates \a s1 and \a s2 and returns the result as a string.
569 */
570TAGLIB_EXPORT const TagLib::String operator+(const TagLib::String &s1, const char *s2);
571
572
573/*!
574 * \relates TagLib::String
575 *
576 * Send the string to an output stream.
577 */
578TAGLIB_EXPORT std::ostream &operator<<(std::ostream &s, const TagLib::String &str);
579
580#endif
581