1/****************************************************************************
2**
3** Copyright (C) 2014 Digia Plc and/or its subsidiary(-ies).
4** Contact: http://www.qt-project.org/legal
5**
6** This file is part of the QtCore module of the Qt Toolkit.
7**
8** $QT_BEGIN_LICENSE:LGPL$
9** Commercial License Usage
10** Licensees holding valid commercial Qt licenses may use this file in
11** accordance with the commercial license agreement provided with the
12** Software or, alternatively, in accordance with the terms contained in
13** a written agreement between you and Digia. For licensing terms and
14** conditions see http://qt.digia.com/licensing. For further information
15** use the contact form at http://qt.digia.com/contact-us.
16**
17** GNU Lesser General Public License Usage
18** Alternatively, this file may be used under the terms of the GNU Lesser
19** General Public License version 2.1 as published by the Free Software
20** Foundation and appearing in the file LICENSE.LGPL included in the
21** packaging of this file. Please review the following information to
22** ensure the GNU Lesser General Public License version 2.1 requirements
23** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
24**
25** In addition, as a special exception, Digia gives you certain additional
26** rights. These rights are described in the Digia Qt LGPL Exception
27** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
28**
29** GNU General Public License Usage
30** Alternatively, this file may be used under the terms of the GNU
31** General Public License version 3.0 as published by the Free Software
32** Foundation and appearing in the file LICENSE.GPL included in the
33** packaging of this file. Please review the following information to
34** ensure the GNU General Public License version 3.0 requirements will be
35** met: http://www.gnu.org/copyleft/gpl.html.
36**
37**
38** $QT_END_LICENSE$
39**
40****************************************************************************/
41
42#include "qplatformdefs.h"
43#include "qtextcodec.h"
44#include "qtextcodec_p.h"
45
46#ifndef QT_NO_TEXTCODEC
47
48#include "qlist.h"
49#include "qfile.h"
50#include "qvarlengtharray.h"
51#ifndef QT_NO_LIBRARY
52# include "qcoreapplication.h"
53# include "qtextcodecplugin.h"
54# include "private/qfactoryloader_p.h"
55#endif
56#include "qstringlist.h"
57
58#ifdef Q_OS_UNIX
59# include "qiconvcodec_p.h"
60#endif
61
62#include "qutfcodec_p.h"
63#include "qsimplecodec_p.h"
64#include "qlatincodec_p.h"
65#ifndef QT_NO_CODECS
66# include "qtsciicodec_p.h"
67# include "qisciicodec_p.h"
68#if !defined(Q_OS_SYMBIAN) && !defined(Q_OS_INTEGRITY)
69# if defined(QT_NO_ICONV) && !defined(QT_BOOTSTRAPPED) && !defined(QT_CODEC_PLUGINS)
70// no iconv(3) support, must build all codecs into the library
71# include "../../plugins/codecs/cn/qgb18030codec.h"
72# include "../../plugins/codecs/jp/qeucjpcodec.h"
73# include "../../plugins/codecs/jp/qjiscodec.h"
74# include "../../plugins/codecs/jp/qsjiscodec.h"
75# include "../../plugins/codecs/kr/qeuckrcodec.h"
76# include "../../plugins/codecs/tw/qbig5codec.h"
77# endif // QT_NO_ICONV && !QT_BOOTSTRAPPED && !QT_CODEC_PLUGINS
78# if defined(Q_WS_X11) && !defined(QT_BOOTSTRAPPED)
79# include "qfontlaocodec_p.h"
80# include "../../plugins/codecs/jp/qfontjpcodec.h"
81# endif
82#endif // QT_NO_SYMBIAN
83#endif // QT_NO_CODECS
84#include "qlocale.h"
85#include "qmutex.h"
86#include "qhash.h"
87
88#include <stdlib.h>
89#include <ctype.h>
90#include <locale.h>
91#if defined (_XOPEN_UNIX) && !defined(Q_OS_QNX) && !defined(Q_OS_OSF)
92#include <langinfo.h>
93#endif
94
95#if defined(Q_OS_WINCE)
96# define QT_NO_SETLOCALE
97#endif
98
99#ifdef Q_OS_SYMBIAN
100#include "qtextcodec_symbian.cpp"
101#endif
102
103
104// enabling this is not exception safe!
105// #define Q_DEBUG_TEXTCODEC
106
107QT_BEGIN_NAMESPACE
108
109#if !defined(QT_NO_LIBRARY) && !defined(QT_NO_TEXTCODECPLUGIN)
110Q_GLOBAL_STATIC_WITH_ARGS(QFactoryLoader, loader,
111 (QTextCodecFactoryInterface_iid, QLatin1String("/codecs")))
112#endif
113
114//Cache for QTextCodec::codecForName and codecForMib.
115typedef QHash<QByteArray, QTextCodec *> QTextCodecCache;
116Q_GLOBAL_STATIC(QTextCodecCache, qTextCodecCache)
117
118
119static char qtolower(register char c)
120{ if (c >= 'A' && c <= 'Z') return c + 0x20; return c; }
121static bool qisalnum(register char c)
122{ return (c >= '0' && c <= '9') || ((c | 0x20) >= 'a' && (c | 0x20) <= 'z'); }
123
124static bool nameMatch(const QByteArray &name, const QByteArray &test)
125{
126 // if they're the same, return a perfect score
127 if (qstricmp(name, test) == 0)
128 return true;
129
130 const char *n = name.constData();
131 const char *h = test.constData();
132
133 // if the letters and numbers are the same, we have a match
134 while (*n != '\0') {
135 if (qisalnum(*n)) {
136 for (;;) {
137 if (*h == '\0')
138 return false;
139 if (qisalnum(*h))
140 break;
141 ++h;
142 }
143 if (qtolower(*n) != qtolower(*h))
144 return false;
145 ++h;
146 }
147 ++n;
148 }
149 while (*h && !qisalnum(*h))
150 ++h;
151 return (*h == '\0');
152}
153
154
155static QTextCodec *createForName(const QByteArray &name)
156{
157#if !defined(QT_NO_LIBRARY) && !defined(QT_NO_TEXTCODECPLUGIN)
158 QFactoryLoader *l = loader();
159 QStringList keys = l->keys();
160 for (int i = 0; i < keys.size(); ++i) {
161 if (nameMatch(name, keys.at(i).toLatin1())) {
162 QString realName = keys.at(i);
163 if (QTextCodecFactoryInterface *factory
164 = qobject_cast<QTextCodecFactoryInterface*>(l->instance(realName))) {
165 return factory->create(realName);
166 }
167 }
168 }
169#else
170 Q_UNUSED(name);
171#endif
172 return 0;
173}
174
175static QTextCodec *createForMib(int mib)
176{
177#ifndef QT_NO_TEXTCODECPLUGIN
178 QString name = QLatin1String("MIB: ") + QString::number(mib);
179 if (QTextCodecFactoryInterface *factory
180 = qobject_cast<QTextCodecFactoryInterface*>(loader()->instance(name)))
181 return factory->create(name);
182#else
183 Q_UNUSED(mib);
184#endif
185 return 0;
186}
187
188static QList<QTextCodec*> *all = 0;
189#ifdef Q_DEBUG_TEXTCODEC
190static bool destroying_is_ok = false;
191#endif
192
193static QTextCodec *localeMapper = 0;
194QTextCodec *QTextCodec::cftr = 0;
195
196
197class QTextCodecCleanup
198{
199public:
200 ~QTextCodecCleanup();
201};
202
203/*
204 Deletes all the created codecs. This destructor is called just
205 before exiting to delete any QTextCodec objects that may be lying
206 around.
207*/
208QTextCodecCleanup::~QTextCodecCleanup()
209{
210 if (!all)
211 return;
212
213#ifdef Q_DEBUG_TEXTCODEC
214 destroying_is_ok = true;
215#endif
216
217 QList<QTextCodec *> *myAll = all;
218 all = 0; // Otherwise the d'tor destroys the iterator
219 for (QList<QTextCodec *>::const_iterator it = myAll->constBegin()
220 ; it != myAll->constEnd(); ++it) {
221 delete *it;
222 }
223 delete myAll;
224 localeMapper = 0;
225
226#ifdef Q_DEBUG_TEXTCODEC
227 destroying_is_ok = false;
228#endif
229}
230
231Q_GLOBAL_STATIC(QTextCodecCleanup, createQTextCodecCleanup)
232
233bool QTextCodec::validCodecs()
234{
235#ifdef Q_OS_SYMBIAN
236 // If we don't have a trap handler, we're outside of the main() function,
237 // ie. in global constructors or destructors. Don't use codecs in this
238 // case as it would lead to crashes because we don't have a cleanup stack on Symbian
239 return (User::TrapHandler() != NULL);
240#else
241 return true;
242#endif
243}
244
245
246#if defined(Q_OS_WIN32) || defined(Q_OS_WINCE)
247class QWindowsLocalCodec: public QTextCodec
248{
249public:
250 QWindowsLocalCodec();
251 ~QWindowsLocalCodec();
252
253 QString convertToUnicode(const char *, int, ConverterState *) const;
254 QByteArray convertFromUnicode(const QChar *, int, ConverterState *) const;
255 QString convertToUnicodeCharByChar(const char *chars, int length, ConverterState *state) const;
256
257 QByteArray name() const;
258 int mibEnum() const;
259
260};
261
262QWindowsLocalCodec::QWindowsLocalCodec()
263{
264}
265
266QWindowsLocalCodec::~QWindowsLocalCodec()
267{
268}
269
270QString QWindowsLocalCodec::convertToUnicode(const char *chars, int length, ConverterState *state) const
271{
272 const char *mb = chars;
273 int mblen = length;
274
275 if (!mb || !mblen)
276 return QString();
277
278 QVarLengthArray<wchar_t, 4096> wc(4096);
279 int len;
280 QString sp;
281 bool prepend = false;
282 char state_data = 0;
283 int remainingChars = 0;
284
285 //save the current state information
286 if (state) {
287 state_data = (char)state->state_data[0];
288 remainingChars = state->remainingChars;
289 }
290
291 //convert the pending charcter (if available)
292 if (state && remainingChars) {
293 char prev[3] = {0};
294 prev[0] = state_data;
295 prev[1] = mb[0];
296 remainingChars = 0;
297 len = MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED,
298 prev, 2, wc.data(), wc.size());
299 if (len) {
300 prepend = true;
301 sp.append(QChar(wc[0]));
302 mb++;
303 mblen--;
304 wc[0] = 0;
305 }
306 }
307
308 while (!(len=MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED|MB_ERR_INVALID_CHARS,
309 mb, mblen, wc.data(), wc.size()))) {
310 int r = GetLastError();
311 if (r == ERROR_INSUFFICIENT_BUFFER) {
312 const int wclen = MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED,
313 mb, mblen, 0, 0);
314 wc.resize(wclen);
315 } else if (r == ERROR_NO_UNICODE_TRANSLATION) {
316 //find the last non NULL character
317 while (mblen > 1 && !(mb[mblen-1]))
318 mblen--;
319 //check whether, we hit an invalid character in the middle
320 if ((mblen <= 1) || (remainingChars && state_data))
321 return convertToUnicodeCharByChar(chars, length, state);
322 //Remove the last character and try again...
323 state_data = mb[mblen-1];
324 remainingChars = 1;
325 mblen--;
326 } else {
327 // Fail.
328 qWarning("MultiByteToWideChar: Cannot convert multibyte text");
329 break;
330 }
331 }
332
333 if (len <= 0)
334 return QString();
335
336 if (wc[len-1] == 0) // len - 1: we don't want terminator
337 --len;
338
339 //save the new state information
340 if (state) {
341 state->state_data[0] = (char)state_data;
342 state->remainingChars = remainingChars;
343 }
344 QString s((QChar*)wc.data(), len);
345 if (prepend) {
346 return sp+s;
347 }
348 return s;
349}
350
351QString QWindowsLocalCodec::convertToUnicodeCharByChar(const char *chars, int length, ConverterState *state) const
352{
353 if (!chars || !length)
354 return QString();
355
356 int copyLocation = 0;
357 int extra = 2;
358 if (state && state->remainingChars) {
359 copyLocation = state->remainingChars;
360 extra += copyLocation;
361 }
362 int newLength = length + extra;
363 char *mbcs = new char[newLength];
364 //ensure that we have a NULL terminated string
365 mbcs[newLength-1] = 0;
366 mbcs[newLength-2] = 0;
367 memcpy(&(mbcs[copyLocation]), chars, length);
368 if (copyLocation) {
369 //copy the last character from the state
370 mbcs[0] = (char)state->state_data[0];
371 state->remainingChars = 0;
372 }
373 const char *mb = mbcs;
374#ifndef Q_OS_WINCE
375 const char *next = 0;
376 QString s;
377 while((next = CharNextExA(CP_ACP, mb, 0)) != mb) {
378 wchar_t wc[2] ={0};
379 int charlength = next - mb;
380 int len = MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED|MB_ERR_INVALID_CHARS, mb, charlength, wc, 2);
381 if (len>0) {
382 s.append(QChar(wc[0]));
383 } else {
384 int r = GetLastError();
385 //check if the character being dropped is the last character
386 if (r == ERROR_NO_UNICODE_TRANSLATION && mb == (mbcs+newLength -3) && state) {
387 state->remainingChars = 1;
388 state->state_data[0] = (char)*mb;
389 }
390 }
391 mb = next;
392 }
393#else
394 QString s;
395 int size = mbstowcs(NULL, mb, length);
396 if (size < 0) {
397 Q_ASSERT("Error in CE TextCodec");
398 return QString();
399 }
400 wchar_t* ws = new wchar_t[size + 2];
401 ws[size +1] = 0;
402 ws[size] = 0;
403 size = mbstowcs(ws, mb, length);
404 for (int i=0; i< size; i++)
405 s.append(QChar(ws[i]));
406 delete [] ws;
407#endif
408 delete [] mbcs;
409 return s;
410}
411
412QByteArray QWindowsLocalCodec::convertFromUnicode(const QChar *ch, int uclen, ConverterState *) const
413{
414 if (!ch)
415 return QByteArray();
416 if (uclen == 0)
417 return QByteArray("");
418 BOOL used_def;
419 QByteArray mb(4096, 0);
420 int len;
421 while (!(len=WideCharToMultiByte(CP_ACP, 0, (const wchar_t*)ch, uclen,
422 mb.data(), mb.size()-1, 0, &used_def)))
423 {
424 int r = GetLastError();
425 if (r == ERROR_INSUFFICIENT_BUFFER) {
426 mb.resize(1+WideCharToMultiByte(CP_ACP, 0,
427 (const wchar_t*)ch, uclen,
428 0, 0, 0, &used_def));
429 // and try again...
430 } else {
431#ifndef QT_NO_DEBUG
432 // Fail.
433 qWarning("WideCharToMultiByte: Cannot convert multibyte text (error %d): %s (UTF-8)",
434 r, QString(ch, uclen).toLocal8Bit().data());
435#endif
436 break;
437 }
438 }
439 mb.resize(len);
440 return mb;
441}
442
443
444QByteArray QWindowsLocalCodec::name() const
445{
446 return "System";
447}
448
449int QWindowsLocalCodec::mibEnum() const
450{
451 return 0;
452}
453
454#else
455
456/* locale names mostly copied from XFree86 */
457static const char * const iso8859_2locales[] = {
458 "croatian", "cs", "cs_CS", "cs_CZ","cz", "cz_CZ", "czech", "hr",
459 "hr_HR", "hu", "hu_HU", "hungarian", "pl", "pl_PL", "polish", "ro",
460 "ro_RO", "rumanian", "serbocroatian", "sh", "sh_SP", "sh_YU", "sk",
461 "sk_SK", "sl", "sl_CS", "sl_SI", "slovak", "slovene", "sr_SP", 0 };
462
463static const char * const iso8859_3locales[] = {
464 "eo", 0 };
465
466static const char * const iso8859_4locales[] = {
467 "ee", "ee_EE", 0 };
468
469static const char * const iso8859_5locales[] = {
470 "mk", "mk_MK", "sp", "sp_YU", 0 };
471
472static const char * const cp_1251locales[] = {
473 "be", "be_BY", "bg", "bg_BG", "bulgarian", 0 };
474
475static const char * const pt_154locales[] = {
476 "ba_RU", "ky", "ky_KG", "kk", "kk_KZ", 0 };
477
478static const char * const iso8859_6locales[] = {
479 "ar_AA", "ar_SA", "arabic", 0 };
480
481static const char * const iso8859_7locales[] = {
482 "el", "el_GR", "greek", 0 };
483
484static const char * const iso8859_8locales[] = {
485 "hebrew", "he", "he_IL", "iw", "iw_IL", 0 };
486
487static const char * const iso8859_9locales[] = {
488 "tr", "tr_TR", "turkish", 0 };
489
490static const char * const iso8859_13locales[] = {
491 "lt", "lt_LT", "lv", "lv_LV", 0 };
492
493static const char * const iso8859_15locales[] = {
494 "et", "et_EE",
495 // Euro countries
496 "br_FR", "ca_ES", "de", "de_AT", "de_BE", "de_DE", "de_LU", "en_IE",
497 "es", "es_ES", "eu_ES", "fi", "fi_FI", "finnish", "fr", "fr_FR",
498 "fr_BE", "fr_LU", "french", "ga_IE", "gl_ES", "it", "it_IT", "oc_FR",
499 "nl", "nl_BE", "nl_NL", "pt", "pt_PT", "sv_FI", "wa_BE",
500 0 };
501
502static const char * const koi8_ulocales[] = {
503 "uk", "uk_UA", "ru_UA", "ukrainian", 0 };
504
505static const char * const tis_620locales[] = {
506 "th", "th_TH", "thai", 0 };
507
508// static const char * const tcvnlocales[] = {
509// "vi", "vi_VN", 0 };
510
511static bool try_locale_list(const char * const locale[], const QByteArray &lang)
512{
513 int i;
514 for(i=0; locale[i] && lang != locale[i]; i++)
515 ;
516 return locale[i] != 0;
517}
518
519// For the probably_koi8_locales we have to look. the standard says
520// these are 8859-5, but almost all Russian users use KOI8-R and
521// incorrectly set $LANG to ru_RU. We'll check tolower() to see what
522// it thinks ru_RU means.
523
524// If you read the history, it seems that many Russians blame ISO and
525// Perestroika for the confusion.
526//
527// The real bug is that some programs break if the user specifies
528// ru_RU.KOI8-R.
529
530static const char * const probably_koi8_rlocales[] = {
531 "ru", "ru_SU", "ru_RU", "russian", 0 };
532
533static QTextCodec * ru_RU_hack(const char * i) {
534 QTextCodec * ru_RU_codec = 0;
535
536#if !defined(QT_NO_SETLOCALE)
537 QByteArray origlocale(setlocale(LC_CTYPE, i));
538#else
539 QByteArray origlocale(i);
540#endif
541 // unicode koi8r latin5 name
542 // 0x044E 0xC0 0xEE CYRILLIC SMALL LETTER YU
543 // 0x042E 0xE0 0xCE CYRILLIC CAPITAL LETTER YU
544 int latin5 = tolower(0xCE);
545 int koi8r = tolower(0xE0);
546 if (koi8r == 0xC0 && latin5 != 0xEE) {
547 ru_RU_codec = QTextCodec::codecForName("KOI8-R");
548 } else if (koi8r != 0xC0 && latin5 == 0xEE) {
549 ru_RU_codec = QTextCodec::codecForName("ISO 8859-5");
550 } else {
551 // something else again... let's assume... *throws dice*
552 ru_RU_codec = QTextCodec::codecForName("KOI8-R");
553 qWarning("QTextCodec: Using KOI8-R, probe failed (%02x %02x %s)",
554 koi8r, latin5, i);
555 }
556#if !defined(QT_NO_SETLOCALE)
557 setlocale(LC_CTYPE, origlocale);
558#endif
559
560 return ru_RU_codec;
561}
562
563#endif
564
565#if !defined(Q_OS_WIN32) && !defined(Q_OS_WINCE)
566static QTextCodec *checkForCodec(const QByteArray &name) {
567 QTextCodec *c = QTextCodec::codecForName(name);
568 if (!c) {
569 const int index = name.indexOf('@');
570 if (index != -1) {
571 c = QTextCodec::codecForName(name.left(index));
572 }
573 }
574 return c;
575}
576#endif
577
578/* the next two functions are implicitely thread safe,
579 as they are only called by setup() which uses a mutex.
580*/
581static void setupLocaleMapper()
582{
583#ifdef Q_OS_SYMBIAN
584 localeMapper = QSymbianTextCodec::localeMapper;
585 if (localeMapper)
586 return;
587#endif
588
589#if defined(Q_OS_WIN32) || defined(Q_OS_WINCE)
590 localeMapper = QTextCodec::codecForName("System");
591#else
592
593#ifndef QT_NO_ICONV
594 localeMapper = QTextCodec::codecForName("System");
595#endif
596
597#if defined (_XOPEN_UNIX) && !defined(Q_OS_QNX) && !defined(Q_OS_OSF)
598 if (!localeMapper) {
599 char *charset = nl_langinfo (CODESET);
600 if (charset)
601 localeMapper = QTextCodec::codecForName(charset);
602 }
603#endif
604
605 if (!localeMapper) {
606 // Very poorly defined and followed standards causes lots of
607 // code to try to get all the cases... This logic is
608 // duplicated in QIconvCodec, so if you change it here, change
609 // it there too.
610
611 // Try to determine locale codeset from locale name assigned to
612 // LC_CTYPE category.
613
614 // First part is getting that locale name. First try setlocale() which
615 // definitely knows it, but since we cannot fully trust it, get ready
616 // to fall back to environment variables.
617#if !defined(QT_NO_SETLOCALE)
618 const QByteArray ctype = setlocale(LC_CTYPE, 0);
619#else
620 const QByteArray ctype;
621#endif
622
623 // Get the first nonempty value from $LC_ALL, $LC_CTYPE, and $LANG
624 // environment variables.
625 QByteArray lang = qgetenv("LC_ALL");
626 if (lang.isEmpty() || lang == "C") {
627 lang = qgetenv("LC_CTYPE");
628 }
629 if (lang.isEmpty() || lang == "C") {
630 lang = qgetenv("LANG");
631 }
632
633 // Now try these in order:
634 // 1. CODESET from ctype if it contains a .CODESET part (e.g. en_US.ISO8859-15)
635 // 2. CODESET from lang if it contains a .CODESET part
636 // 3. ctype (maybe the locale is named "ISO-8859-1" or something)
637 // 4. locale (ditto)
638 // 5. check for "@euro"
639 // 6. guess locale from ctype unless ctype is "C"
640 // 7. guess locale from lang
641
642 // 1. CODESET from ctype if it contains a .CODESET part (e.g. en_US.ISO8859-15)
643 int indexOfDot = ctype.indexOf('.');
644 if (indexOfDot != -1)
645 localeMapper = checkForCodec( ctype.mid(indexOfDot + 1) );
646
647 // 2. CODESET from lang if it contains a .CODESET part
648 if (!localeMapper) {
649 indexOfDot = lang.indexOf('.');
650 if (indexOfDot != -1)
651 localeMapper = checkForCodec( lang.mid(indexOfDot + 1) );
652 }
653
654 // 3. ctype (maybe the locale is named "ISO-8859-1" or something)
655 if (!localeMapper && !ctype.isEmpty() && ctype != "C")
656 localeMapper = checkForCodec(ctype);
657
658 // 4. locale (ditto)
659 if (!localeMapper && !lang.isEmpty())
660 localeMapper = checkForCodec(lang);
661
662 // 5. "@euro"
663 if ((!localeMapper && ctype.contains("@euro")) || lang.contains("@euro"))
664 localeMapper = checkForCodec("ISO 8859-15");
665
666 // 6. guess locale from ctype unless ctype is "C"
667 // 7. guess locale from lang
668 const QByteArray &try_by_name = (!ctype.isEmpty() && ctype != "C") ? lang : ctype;
669
670 // Now do the guessing.
671 if (!lang.isEmpty() && !localeMapper && !try_by_name.isEmpty()) {
672 if (try_locale_list(iso8859_15locales, lang))
673 localeMapper = QTextCodec::codecForName("ISO 8859-15");
674 else if (try_locale_list(iso8859_2locales, lang))
675 localeMapper = QTextCodec::codecForName("ISO 8859-2");
676 else if (try_locale_list(iso8859_3locales, lang))
677 localeMapper = QTextCodec::codecForName("ISO 8859-3");
678 else if (try_locale_list(iso8859_4locales, lang))
679 localeMapper = QTextCodec::codecForName("ISO 8859-4");
680 else if (try_locale_list(iso8859_5locales, lang))
681 localeMapper = QTextCodec::codecForName("ISO 8859-5");
682 else if (try_locale_list(iso8859_6locales, lang))
683 localeMapper = QTextCodec::codecForName("ISO 8859-6");
684 else if (try_locale_list(iso8859_7locales, lang))
685 localeMapper = QTextCodec::codecForName("ISO 8859-7");
686 else if (try_locale_list(iso8859_8locales, lang))
687 localeMapper = QTextCodec::codecForName("ISO 8859-8-I");
688 else if (try_locale_list(iso8859_9locales, lang))
689 localeMapper = QTextCodec::codecForName("ISO 8859-9");
690 else if (try_locale_list(iso8859_13locales, lang))
691 localeMapper = QTextCodec::codecForName("ISO 8859-13");
692 else if (try_locale_list(tis_620locales, lang))
693 localeMapper = QTextCodec::codecForName("ISO 8859-11");
694 else if (try_locale_list(koi8_ulocales, lang))
695 localeMapper = QTextCodec::codecForName("KOI8-U");
696 else if (try_locale_list(cp_1251locales, lang))
697 localeMapper = QTextCodec::codecForName("CP 1251");
698 else if (try_locale_list(pt_154locales, lang))
699 localeMapper = QTextCodec::codecForName("PT 154");
700 else if (try_locale_list(probably_koi8_rlocales, lang))
701 localeMapper = ru_RU_hack(lang);
702 }
703
704 }
705
706 // If everything failed, we default to 8859-1
707 // We could perhaps default to 8859-15.
708 if (!localeMapper)
709 localeMapper = QTextCodec::codecForName("ISO 8859-1");
710#endif
711}
712
713#ifndef QT_NO_THREAD
714Q_GLOBAL_STATIC_WITH_ARGS(QMutex, textCodecsMutex, (QMutex::Recursive));
715#endif
716
717// textCodecsMutex need to be locked to enter this function
718static void setup()
719{
720 if (all)
721 return;
722
723#ifdef Q_OS_SYMBIAN
724 // If we don't have a trap handler, we're outside of the main() function,
725 // ie. in global constructors or destructors. Don't create codecs in this
726 // case as it would lead to crashes because of a missing cleanup stack on Symbian
727 if (User::TrapHandler() == NULL)
728 return;
729#endif
730
731#ifdef Q_DEBUG_TEXTCODEC
732 if (destroying_is_ok)
733 qWarning("QTextCodec: Creating new codec during codec cleanup");
734#endif
735 all = new QList<QTextCodec*>;
736 // create the cleanup object to cleanup all codecs on exit
737 (void) createQTextCodecCleanup();
738
739#ifndef QT_NO_CODECS
740 (void)new QTsciiCodec;
741 for (int i = 0; i < 9; ++i)
742 (void)new QIsciiCodec(i);
743
744 for (int i = 0; i < QSimpleTextCodec::numSimpleCodecs; ++i)
745 (void)new QSimpleTextCodec(i);
746
747#ifdef Q_OS_SYMBIAN
748 localeMapper = QSymbianTextCodec::init();
749#endif
750
751# if defined(Q_WS_X11) && !defined(QT_BOOTSTRAPPED)
752 // no font codecs when bootstrapping
753 (void)new QFontLaoCodec;
754# if defined(QT_NO_ICONV)
755 // no iconv(3) support, must build all codecs into the library
756 (void)new QFontGb2312Codec;
757 (void)new QFontGbkCodec;
758 (void)new QFontGb18030_0Codec;
759 (void)new QFontJis0208Codec;
760 (void)new QFontJis0201Codec;
761 (void)new QFontKsc5601Codec;
762 (void)new QFontBig5hkscsCodec;
763 (void)new QFontBig5Codec;
764# endif // QT_NO_ICONV && !QT_BOOTSTRAPPED
765# endif // Q_WS_X11
766
767
768#if !defined(Q_OS_SYMBIAN) && !defined(Q_OS_INTEGRITY)
769# if defined(QT_NO_ICONV) && !defined(QT_BOOTSTRAPPED) && !defined(QT_CODEC_PLUGINS)
770 // no asian codecs when bootstrapping, sorry
771 (void)new QGb18030Codec;
772 (void)new QGbkCodec;
773 (void)new QGb2312Codec;
774 (void)new QEucJpCodec;
775 (void)new QJisCodec;
776 (void)new QSjisCodec;
777 (void)new QEucKrCodec;
778 (void)new QCP949Codec;
779 (void)new QBig5Codec;
780 (void)new QBig5hkscsCodec;
781# endif // QT_NO_ICONV && !QT_BOOTSTRAPPED && !QT_CODEC_PLUGINS
782#endif //Q_OS_SYMBIAN
783#endif // QT_NO_CODECS
784
785#if defined(Q_OS_WIN32) || defined(Q_OS_WINCE)
786 (void) new QWindowsLocalCodec;
787#endif // Q_OS_WIN32
788
789 (void)new QUtf16Codec;
790 (void)new QUtf16BECodec;
791 (void)new QUtf16LECodec;
792 (void)new QUtf32Codec;
793 (void)new QUtf32BECodec;
794 (void)new QUtf32LECodec;
795#ifndef Q_OS_SYMBIAN
796 (void)new QLatin15Codec;
797#endif
798 (void)new QLatin1Codec;
799 (void)new QUtf8Codec;
800
801#if !defined(Q_OS_SYMBIAN) && !defined(Q_OS_INTEGRITY)
802#if defined(Q_OS_UNIX) && !defined(QT_NO_ICONV) && !defined(QT_BOOTSTRAPPED)
803 // QIconvCodec depends on the UTF-16 codec, so it needs to be created last
804 (void) new QIconvCodec();
805#endif
806#endif
807
808 if (!localeMapper)
809 setupLocaleMapper();
810}
811
812/*!
813 \enum QTextCodec::ConversionFlag
814
815 \value DefaultConversion No flag is set.
816 \value ConvertInvalidToNull If this flag is set, each invalid input
817 character is output as a null character.
818 \value IgnoreHeader Ignore any Unicode byte-order mark and don't generate any.
819
820 \omitvalue FreeFunction
821*/
822
823/*!
824 \fn QTextCodec::ConverterState::ConverterState(ConversionFlags flags)
825
826 Constructs a ConverterState object initialized with the given \a flags.
827*/
828
829/*!
830 Destroys the ConverterState object.
831*/
832QTextCodec::ConverterState::~ConverterState()
833{
834 if (flags & FreeFunction)
835 (QTextCodecUnalignedPointer::decode(state_data))(this);
836 else if (d)
837 qFree(d);
838}
839
840/*!
841 \class QTextCodec
842 \brief The QTextCodec class provides conversions between text encodings.
843 \reentrant
844 \ingroup i18n
845
846 Qt uses Unicode to store, draw and manipulate strings. In many
847 situations you may wish to deal with data that uses a different
848 encoding. For example, most Japanese documents are still stored
849 in Shift-JIS or ISO 2022-JP, while Russian users often have their
850 documents in KOI8-R or Windows-1251.
851
852 Qt provides a set of QTextCodec classes to help with converting
853 non-Unicode formats to and from Unicode. You can also create your
854 own codec classes.
855
856 The supported encodings are:
857
858 \list
859 \o Apple Roman
860 \o \l{Big5 Text Codec}{Big5}
861 \o \l{Big5-HKSCS Text Codec}{Big5-HKSCS}
862 \o CP949
863 \o \l{EUC-JP Text Codec}{EUC-JP}
864 \o \l{EUC-KR Text Codec}{EUC-KR}
865 \o \l{GBK Text Codec}{GB18030-0}
866 \o IBM 850
867 \o IBM 866
868 \o IBM 874
869 \o \l{ISO 2022-JP (JIS) Text Codec}{ISO 2022-JP}
870 \o ISO 8859-1 to 10
871 \o ISO 8859-13 to 16
872 \o Iscii-Bng, Dev, Gjr, Knd, Mlm, Ori, Pnj, Tlg, and Tml
873 \o JIS X 0201
874 \o JIS X 0208
875 \o KOI8-R
876 \o KOI8-U
877 \o MuleLao-1
878 \o ROMAN8
879 \o \l{Shift-JIS Text Codec}{Shift-JIS}
880 \o TIS-620
881 \o \l{TSCII Text Codec}{TSCII}
882 \o UTF-8
883 \o UTF-16
884 \o UTF-16BE
885 \o UTF-16LE
886 \o UTF-32
887 \o UTF-32BE
888 \o UTF-32LE
889 \o Windows-1250 to 1258
890 \o WINSAMI2
891 \endlist
892
893 QTextCodecs can be used as follows to convert some locally encoded
894 string to Unicode. Suppose you have some string encoded in Russian
895 KOI8-R encoding, and want to convert it to Unicode. The simple way
896 to do it is like this:
897
898 \snippet doc/src/snippets/code/src_corelib_codecs_qtextcodec.cpp 0
899
900 After this, \c string holds the text converted to Unicode.
901 Converting a string from Unicode to the local encoding is just as
902 easy:
903
904 \snippet doc/src/snippets/code/src_corelib_codecs_qtextcodec.cpp 1
905
906 To read or write files in various encodings, use QTextStream and
907 its \l{QTextStream::setCodec()}{setCodec()} function. See the
908 \l{tools/codecs}{Codecs} example for an application of QTextCodec
909 to file I/O.
910
911 Some care must be taken when trying to convert the data in chunks,
912 for example, when receiving it over a network. In such cases it is
913 possible that a multi-byte character will be split over two
914 chunks. At best this might result in the loss of a character and
915 at worst cause the entire conversion to fail.
916
917 The approach to use in these situations is to create a QTextDecoder
918 object for the codec and use this QTextDecoder for the whole
919 decoding process, as shown below:
920
921 \snippet doc/src/snippets/code/src_corelib_codecs_qtextcodec.cpp 2
922
923 The QTextDecoder object maintains state between chunks and therefore
924 works correctly even if a multi-byte character is split between
925 chunks.
926
927 \section1 Creating Your Own Codec Class
928
929 Support for new text encodings can be added to Qt by creating
930 QTextCodec subclasses.
931
932 The pure virtual functions describe the encoder to the system and
933 the coder is used as required in the different text file formats
934 supported by QTextStream, and under X11, for the locale-specific
935 character input and output.
936
937 To add support for another encoding to Qt, make a subclass of
938 QTextCodec and implement the functions listed in the table below.
939
940 \table
941 \header \o Function \o Description
942
943 \row \o name()
944 \o Returns the official name for the encoding. If the
945 encoding is listed in the
946 \l{IANA character-sets encoding file}, the name
947 should be the preferred MIME name for the encoding.
948
949 \row \o aliases()
950 \o Returns a list of alternative names for the encoding.
951 QTextCodec provides a default implementation that returns
952 an empty list. For example, "ISO-8859-1" has "latin1",
953 "CP819", "IBM819", and "iso-ir-100" as aliases.
954
955 \row \o mibEnum()
956 \o Return the MIB enum for the encoding if it is listed in
957 the \l{IANA character-sets encoding file}.
958
959 \row \o convertToUnicode()
960 \o Converts an 8-bit character string to Unicode.
961
962 \row \o convertFromUnicode()
963 \o Converts a Unicode string to an 8-bit character string.
964 \endtable
965
966 You may find it more convenient to make your codec class
967 available as a plugin; see \l{How to Create Qt Plugins} for
968 details.
969
970 \sa QTextStream, QTextDecoder, QTextEncoder, {Codecs Example}
971*/
972
973/*!
974 Constructs a QTextCodec, and gives it the highest precedence. The
975 QTextCodec should always be constructed on the heap (i.e. with \c
976 new). Qt takes ownership and will delete it when the application
977 terminates.
978*/
979QTextCodec::QTextCodec()
980{
981#ifndef QT_NO_THREAD
982 QMutexLocker locker(textCodecsMutex());
983#endif
984 setup();
985 all->prepend(this);
986}
987
988
989/*!
990 \nonreentrant
991
992 Destroys the QTextCodec. Note that you should not delete codecs
993 yourself: once created they become Qt's responsibility.
994*/
995QTextCodec::~QTextCodec()
996{
997#ifdef Q_DEBUG_TEXTCODEC
998 if (!destroying_is_ok)
999 qWarning("QTextCodec::~QTextCodec: Called by application");
1000#endif
1001 if (all) {
1002#ifndef QT_NO_THREAD
1003 QMutexLocker locker(textCodecsMutex());
1004#endif
1005 all->removeAll(this);
1006 QTextCodecCache *cache = qTextCodecCache();
1007 if (cache)
1008 cache->clear();
1009 }
1010}
1011
1012/*!
1013 \fn QTextCodec *QTextCodec::codecForName(const char *name)
1014
1015 Searches all installed QTextCodec objects and returns the one
1016 which best matches \a name; the match is case-insensitive. Returns
1017 0 if no codec matching the name \a name could be found.
1018*/
1019
1020/*!
1021 Searches all installed QTextCodec objects and returns the one
1022 which best matches \a name; the match is case-insensitive. Returns
1023 0 if no codec matching the name \a name could be found.
1024*/
1025QTextCodec *QTextCodec::codecForName(const QByteArray &name)
1026{
1027 if (name.isEmpty())
1028 return 0;
1029
1030#ifndef QT_NO_THREAD
1031 QMutexLocker locker(textCodecsMutex());
1032#endif
1033 setup();
1034
1035 if (!validCodecs())
1036 return 0;
1037
1038 QTextCodecCache *cache = qTextCodecCache();
1039 QTextCodec *codec;
1040 if (cache) {
1041 codec = cache->value(name);
1042 if (codec)
1043 return codec;
1044 }
1045
1046 for (int i = 0; i < all->size(); ++i) {
1047 QTextCodec *cursor = all->at(i);
1048 if (nameMatch(cursor->name(), name)) {
1049 if (cache)
1050 cache->insert(name, cursor);
1051 return cursor;
1052 }
1053 QList<QByteArray> aliases = cursor->aliases();
1054 for (int y = 0; y < aliases.size(); ++y)
1055 if (nameMatch(aliases.at(y), name)) {
1056 if (cache)
1057 cache->insert(name, cursor);
1058 return cursor;
1059 }
1060 }
1061
1062 codec = createForName(name);
1063 if (codec && cache)
1064 cache->insert(name, codec);
1065 return codec;
1066}
1067
1068
1069/*!
1070 Returns the QTextCodec which matches the \link
1071 QTextCodec::mibEnum() MIBenum\endlink \a mib.
1072*/
1073QTextCodec* QTextCodec::codecForMib(int mib)
1074{
1075#ifndef QT_NO_THREAD
1076 QMutexLocker locker(textCodecsMutex());
1077#endif
1078 setup();
1079
1080 if (!validCodecs())
1081 return 0;
1082
1083 QByteArray key = "MIB: " + QByteArray::number(mib);
1084 QTextCodecCache *cache = qTextCodecCache();
1085 QTextCodec *codec;
1086 if (cache) {
1087 codec = cache->value(key);
1088 if (codec)
1089 return codec;
1090 }
1091
1092 QList<QTextCodec*>::ConstIterator i;
1093 for (int i = 0; i < all->size(); ++i) {
1094 QTextCodec *cursor = all->at(i);
1095 if (cursor->mibEnum() == mib) {
1096 if (cache)
1097 cache->insert(key, cursor);
1098 return cursor;
1099 }
1100 }
1101
1102 codec = createForMib(mib);
1103
1104 // Qt 3 used 1000 (mib for UCS2) as its identifier for the utf16 codec. Map
1105 // this correctly for compatibility.
1106 if (!codec && mib == 1000)
1107 return codecForMib(1015);
1108
1109 if (codec && cache)
1110 cache->insert(key, codec);
1111 return codec;
1112}
1113
1114/*!
1115 Returns the list of all available codecs, by name. Call
1116 QTextCodec::codecForName() to obtain the QTextCodec for the name.
1117
1118 The list may contain many mentions of the same codec
1119 if the codec has aliases.
1120
1121 \sa availableMibs(), name(), aliases()
1122*/
1123QList<QByteArray> QTextCodec::availableCodecs()
1124{
1125#ifndef QT_NO_THREAD
1126 QMutexLocker locker(textCodecsMutex());
1127#endif
1128 setup();
1129
1130 QList<QByteArray> codecs;
1131
1132 if (!validCodecs())
1133 return codecs;
1134
1135 for (int i = 0; i < all->size(); ++i) {
1136 codecs += all->at(i)->name();
1137 codecs += all->at(i)->aliases();
1138 }
1139
1140#ifndef QT_NO_THREAD
1141 locker.unlock();
1142#endif
1143
1144#if !defined(QT_NO_LIBRARY) && !defined(QT_NO_TEXTCODECPLUGIN)
1145 QFactoryLoader *l = loader();
1146 QStringList keys = l->keys();
1147 for (int i = 0; i < keys.size(); ++i) {
1148 if (!keys.at(i).startsWith(QLatin1String("MIB: "))) {
1149 QByteArray name = keys.at(i).toLatin1();
1150 if (!codecs.contains(name))
1151 codecs += name;
1152 }
1153 }
1154#endif
1155
1156 return codecs;
1157}
1158
1159/*!
1160 Returns the list of MIBs for all available codecs. Call
1161 QTextCodec::codecForMib() to obtain the QTextCodec for the MIB.
1162
1163 \sa availableCodecs(), mibEnum()
1164*/
1165QList<int> QTextCodec::availableMibs()
1166{
1167#ifndef QT_NO_THREAD
1168 QMutexLocker locker(textCodecsMutex());
1169#endif
1170 setup();
1171
1172 QList<int> codecs;
1173
1174 if (!validCodecs())
1175 return codecs;
1176
1177 for (int i = 0; i < all->size(); ++i)
1178 codecs += all->at(i)->mibEnum();
1179
1180#ifndef QT_NO_THREAD
1181 locker.unlock();
1182#endif
1183
1184#if !defined(QT_NO_LIBRARY) && !defined(QT_NO_TEXTCODECPLUGIN)
1185 QFactoryLoader *l = loader();
1186 QStringList keys = l->keys();
1187 for (int i = 0; i < keys.size(); ++i) {
1188 if (keys.at(i).startsWith(QLatin1String("MIB: "))) {
1189 int mib = keys.at(i).mid(5).toInt();
1190 if (!codecs.contains(mib))
1191 codecs += mib;
1192 }
1193 }
1194#endif
1195
1196 return codecs;
1197}
1198
1199/*!
1200 Set the codec to \a c; this will be returned by
1201 codecForLocale(). If \a c is a null pointer, the codec is reset to
1202 the default.
1203
1204 This might be needed for some applications that want to use their
1205 own mechanism for setting the locale.
1206
1207 \sa codecForLocale()
1208*/
1209void QTextCodec::setCodecForLocale(QTextCodec *c)
1210{
1211#ifndef QT_NO_THREAD
1212 QMutexLocker locker(textCodecsMutex());
1213#endif
1214 localeMapper = c;
1215 if (!localeMapper)
1216 setupLocaleMapper();
1217}
1218
1219/*!
1220 Returns a pointer to the codec most suitable for this locale.
1221
1222 On Windows, the codec will be based on a system locale. On Unix
1223 systems, starting with Qt 4.2, the codec will be using the \e
1224 iconv library. Note that in both cases the codec's name will be
1225 "System".
1226*/
1227
1228QTextCodec* QTextCodec::codecForLocale()
1229{
1230 if (!validCodecs())
1231 return 0;
1232
1233 if (localeMapper)
1234 return localeMapper;
1235
1236#ifndef QT_NO_THREAD
1237 QMutexLocker locker(textCodecsMutex());
1238#endif
1239 setup();
1240
1241 return localeMapper;
1242}
1243
1244
1245/*!
1246 \fn QByteArray QTextCodec::name() const
1247
1248 QTextCodec subclasses must reimplement this function. It returns
1249 the name of the encoding supported by the subclass.
1250
1251 If the codec is registered as a character set in the
1252 \l{IANA character-sets encoding file} this method should
1253 return the preferred mime name for the codec if defined,
1254 otherwise its name.
1255*/
1256
1257/*!
1258 \fn int QTextCodec::mibEnum() const
1259
1260 Subclasses of QTextCodec must reimplement this function. It
1261 returns the MIBenum (see \l{IANA character-sets encoding file}
1262 for more information). It is important that each QTextCodec
1263 subclass returns the correct unique value for this function.
1264*/
1265
1266/*!
1267 Subclasses can return a number of aliases for the codec in question.
1268
1269 Standard aliases for codecs can be found in the
1270 \l{IANA character-sets encoding file}.
1271*/
1272QList<QByteArray> QTextCodec::aliases() const
1273{
1274 return QList<QByteArray>();
1275}
1276
1277/*!
1278 \fn QString QTextCodec::convertToUnicode(const char *chars, int len,
1279 ConverterState *state) const
1280
1281 QTextCodec subclasses must reimplement this function.
1282
1283 Converts the first \a len characters of \a chars from the
1284 encoding of the subclass to Unicode, and returns the result in a
1285 QString.
1286
1287 \a state can be 0, in which case the conversion is stateless and
1288 default conversion rules should be used. If state is not 0, the
1289 codec should save the state after the conversion in \a state, and
1290 adjust the remainingChars and invalidChars members of the struct.
1291*/
1292
1293/*!
1294 \fn QByteArray QTextCodec::convertFromUnicode(const QChar *input, int number,
1295 ConverterState *state) const
1296
1297 QTextCodec subclasses must reimplement this function.
1298
1299 Converts the first \a number of characters from the \a input array
1300 from Unicode to the encoding of the subclass, and returns the result
1301 in a QByteArray.
1302
1303 \a state can be 0 in which case the conversion is stateless and
1304 default conversion rules should be used. If state is not 0, the
1305 codec should save the state after the conversion in \a state, and
1306 adjust the remainingChars and invalidChars members of the struct.
1307*/
1308
1309/*!
1310 Creates a QTextDecoder which stores enough state to decode chunks
1311 of \c{char *} data to create chunks of Unicode data.
1312
1313 The caller is responsible for deleting the returned object.
1314*/
1315QTextDecoder* QTextCodec::makeDecoder() const
1316{
1317 return new QTextDecoder(this);
1318}
1319
1320/*!
1321 Creates a QTextDecoder with a specified \a flags to decode chunks
1322 of \c{char *} data to create chunks of Unicode data.
1323
1324 The caller is responsible for deleting the returned object.
1325
1326 \since 4.7
1327*/
1328QTextDecoder* QTextCodec::makeDecoder(QTextCodec::ConversionFlags flags) const
1329{
1330 return new QTextDecoder(this, flags);
1331}
1332
1333
1334/*!
1335 Creates a QTextEncoder which stores enough state to encode chunks
1336 of Unicode data as \c{char *} data.
1337
1338 The caller is responsible for deleting the returned object.
1339*/
1340QTextEncoder* QTextCodec::makeEncoder() const
1341{
1342 return new QTextEncoder(this);
1343}
1344
1345/*!
1346 Creates a QTextEncoder with a specified \a flags to encode chunks
1347 of Unicode data as \c{char *} data.
1348
1349 The caller is responsible for deleting the returned object.
1350
1351 \since 4.7
1352*/
1353QTextEncoder* QTextCodec::makeEncoder(QTextCodec::ConversionFlags flags) const
1354{
1355 return new QTextEncoder(this, flags);
1356}
1357
1358/*!
1359 \fn QByteArray QTextCodec::fromUnicode(const QChar *input, int number,
1360 ConverterState *state) const
1361
1362 Converts the first \a number of characters from the \a input array
1363 from Unicode to the encoding of this codec, and returns the result
1364 in a QByteArray.
1365
1366 The \a state of the convertor used is updated.
1367*/
1368
1369/*!
1370 Converts \a str from Unicode to the encoding of this codec, and
1371 returns the result in a QByteArray.
1372*/
1373QByteArray QTextCodec::fromUnicode(const QString& str) const
1374{
1375 return convertFromUnicode(str.constData(), str.length(), 0);
1376}
1377
1378/*!
1379 \fn QString QTextCodec::toUnicode(const char *input, int size,
1380 ConverterState *state) const
1381
1382 Converts the first \a size characters from the \a input from the
1383 encoding of this codec to Unicode, and returns the result in a
1384 QString.
1385
1386 The \a state of the convertor used is updated.
1387*/
1388
1389/*!
1390 Converts \a a from the encoding of this codec to Unicode, and
1391 returns the result in a QString.
1392*/
1393QString QTextCodec::toUnicode(const QByteArray& a) const
1394{
1395 return convertToUnicode(a.constData(), a.length(), 0);
1396}
1397
1398/*!
1399 Returns true if the Unicode character \a ch can be fully encoded
1400 with this codec; otherwise returns false.
1401*/
1402bool QTextCodec::canEncode(QChar ch) const
1403{
1404 ConverterState state;
1405 state.flags = ConvertInvalidToNull;
1406 convertFromUnicode(&ch, 1, &state);
1407 return (state.invalidChars == 0);
1408}
1409
1410/*!
1411 \overload
1412
1413 \a s contains the string being tested for encode-ability.
1414*/
1415bool QTextCodec::canEncode(const QString& s) const
1416{
1417 ConverterState state;
1418 state.flags = ConvertInvalidToNull;
1419 convertFromUnicode(s.constData(), s.length(), &state);
1420 return (state.invalidChars == 0);
1421}
1422
1423#ifdef QT3_SUPPORT
1424/*!
1425 Returns a string representing the current language and
1426 sublanguage, e.g. "pt" for Portuguese, or "pt_br" for Portuguese/Brazil.
1427
1428 \sa QLocale
1429*/
1430const char *QTextCodec::locale()
1431{
1432 static char locale[6];
1433 QByteArray l = QLocale::system().name().toLatin1();
1434 int len = qMin(l.length(), 5);
1435 memcpy(locale, l.constData(), len);
1436 locale[len] = '\0';
1437
1438 return locale;
1439}
1440
1441/*!
1442 \overload
1443*/
1444
1445QByteArray QTextCodec::fromUnicode(const QString& uc, int& lenInOut) const
1446{
1447 QByteArray result = convertFromUnicode(uc.constData(), lenInOut, 0);
1448 lenInOut = result.length();
1449 return result;
1450}
1451
1452/*!
1453 \overload
1454
1455 \a a contains the source characters; \a len contains the number of
1456 characters in \a a to use.
1457*/
1458QString QTextCodec::toUnicode(const QByteArray& a, int len) const
1459{
1460 len = qMin(a.size(), len);
1461 return convertToUnicode(a.constData(), len, 0);
1462}
1463#endif
1464
1465/*!
1466 \overload
1467
1468 \a chars contains the source characters.
1469*/
1470QString QTextCodec::toUnicode(const char *chars) const
1471{
1472 int len = qstrlen(chars);
1473 return convertToUnicode(chars, len, 0);
1474}
1475
1476
1477/*!
1478 \class QTextEncoder
1479 \brief The QTextEncoder class provides a state-based encoder.
1480 \reentrant
1481 \ingroup i18n
1482
1483 A text encoder converts text from Unicode into an encoded text format
1484 using a specific codec.
1485
1486 The encoder converts Unicode into another format, remembering any
1487 state that is required between calls.
1488
1489 \sa QTextCodec::makeEncoder(), QTextDecoder
1490*/
1491
1492/*!
1493 \fn QTextEncoder::QTextEncoder(const QTextCodec *codec)
1494
1495 Constructs a text encoder for the given \a codec.
1496*/
1497
1498/*!
1499 Constructs a text encoder for the given \a codec and conversion \a flags.
1500
1501 \since 4.7
1502*/
1503QTextEncoder::QTextEncoder(const QTextCodec *codec, QTextCodec::ConversionFlags flags)
1504 : c(codec), state()
1505{
1506 state.flags = flags;
1507}
1508
1509/*!
1510 Destroys the encoder.
1511*/
1512QTextEncoder::~QTextEncoder()
1513{
1514}
1515
1516/*! \internal
1517 \since 4.5
1518 Determines whether the eecoder encountered a failure while decoding the input. If
1519 an error was encountered, the produced result is undefined, and gets converted as according
1520 to the conversion flags.
1521 */
1522bool QTextEncoder::hasFailure() const
1523{
1524 return state.invalidChars != 0;
1525}
1526
1527/*!
1528 Converts the Unicode string \a str into an encoded QByteArray.
1529*/
1530QByteArray QTextEncoder::fromUnicode(const QString& str)
1531{
1532 QByteArray result = c->fromUnicode(str.constData(), str.length(), &state);
1533 return result;
1534}
1535
1536/*!
1537 \overload
1538
1539 Converts \a len characters (not bytes) from \a uc, and returns the
1540 result in a QByteArray.
1541*/
1542QByteArray QTextEncoder::fromUnicode(const QChar *uc, int len)
1543{
1544 QByteArray result = c->fromUnicode(uc, len, &state);
1545 return result;
1546}
1547
1548#ifdef QT3_SUPPORT
1549/*!
1550 \overload
1551
1552 Converts \a lenInOut characters (not bytes) from \a uc, and returns the
1553 result in a QByteArray. The number of characters read is returned in
1554 the \a lenInOut parameter.
1555*/
1556QByteArray QTextEncoder::fromUnicode(const QString& uc, int& lenInOut)
1557{
1558 QByteArray result = c->fromUnicode(uc.constData(), lenInOut, &state);
1559 lenInOut = result.length();
1560 return result;
1561}
1562#endif
1563
1564/*!
1565 \class QTextDecoder
1566 \brief The QTextDecoder class provides a state-based decoder.
1567 \reentrant
1568 \ingroup i18n
1569
1570 A text decoder converts text from an encoded text format into Unicode
1571 using a specific codec.
1572
1573 The decoder converts text in this format into Unicode, remembering any
1574 state that is required between calls.
1575
1576 \sa QTextCodec::makeDecoder(), QTextEncoder
1577*/
1578
1579/*!
1580 \fn QTextDecoder::QTextDecoder(const QTextCodec *codec)
1581
1582 Constructs a text decoder for the given \a codec.
1583*/
1584
1585/*!
1586 Constructs a text decoder for the given \a codec and conversion \a flags.
1587
1588 \since 4.7
1589*/
1590
1591QTextDecoder::QTextDecoder(const QTextCodec *codec, QTextCodec::ConversionFlags flags)
1592 : c(codec), state()
1593{
1594 state.flags = flags;
1595}
1596
1597/*!
1598 Destroys the decoder.
1599*/
1600QTextDecoder::~QTextDecoder()
1601{
1602}
1603
1604/*!
1605 \fn QString QTextDecoder::toUnicode(const char *chars, int len)
1606
1607 Converts the first \a len bytes in \a chars to Unicode, returning
1608 the result.
1609
1610 If not all characters are used (e.g. if only part of a multi-byte
1611 encoding is at the end of the characters), the decoder remembers
1612 enough state to continue with the next call to this function.
1613*/
1614QString QTextDecoder::toUnicode(const char *chars, int len)
1615{
1616 return c->toUnicode(chars, len, &state);
1617}
1618
1619
1620/*! \overload
1621
1622 The converted string is returned in \a target.
1623 */
1624void QTextDecoder::toUnicode(QString *target, const char *chars, int len)
1625{
1626 Q_ASSERT(target);
1627 switch (c->mibEnum()) {
1628 case 106: // utf8
1629 static_cast<const QUtf8Codec*>(c)->convertToUnicode(target, chars, len, &state);
1630 break;
1631 case 4: { // latin1
1632 target->resize(len);
1633 ushort *data = (ushort*)target->data();
1634 for (int i = len; i >=0; --i)
1635 data[i] = (uchar) chars[i];
1636 } break;
1637 default:
1638 *target = c->toUnicode(chars, len, &state);
1639 }
1640}
1641
1642
1643/*!
1644 \overload
1645
1646 Converts the bytes in the byte array specified by \a ba to Unicode
1647 and returns the result.
1648*/
1649QString QTextDecoder::toUnicode(const QByteArray &ba)
1650{
1651 return c->toUnicode(ba.constData(), ba.length(), &state);
1652}
1653
1654
1655/*!
1656 \fn QTextCodec* QTextCodec::codecForTr()
1657
1658 Returns the codec used by QObject::tr() on its argument. If this
1659 function returns 0 (the default), tr() assumes Latin-1.
1660
1661 \sa setCodecForTr()
1662*/
1663
1664/*!
1665 \fn void QTextCodec::setCodecForTr(QTextCodec *c)
1666 \nonreentrant
1667
1668 Sets the codec used by QObject::tr() on its argument to \a c. If
1669 \a c is 0 (the default), tr() assumes Latin-1.
1670
1671 If the literal quoted text in the program is not in the Latin-1
1672 encoding, this function can be used to set the appropriate
1673 encoding. For example, software developed by Korean programmers
1674 might use eucKR for all the text in the program, in which case the
1675 main() function might look like this:
1676
1677 \snippet doc/src/snippets/code/src_corelib_codecs_qtextcodec.cpp 3
1678
1679 Note that this is not the way to select the encoding that the \e
1680 user has chosen. For example, to convert an application containing
1681 literal English strings to Korean, all that is needed is for the
1682 English strings to be passed through tr() and for translation
1683 files to be loaded. For details of internationalization, see
1684 \l{Internationalization with Qt}.
1685
1686 \sa codecForTr(), setCodecForCStrings()
1687*/
1688
1689
1690/*!
1691 \fn QTextCodec* QTextCodec::codecForCStrings()
1692
1693 Returns the codec used by QString to convert to and from \c{const
1694 char *} and QByteArrays. If this function returns 0 (the default),
1695 QString assumes Latin-1.
1696
1697 \sa setCodecForCStrings()
1698*/
1699
1700/*!
1701 \fn void QTextCodec::setCodecForCStrings(QTextCodec *codec)
1702 \nonreentrant
1703
1704 Sets the codec used by QString to convert to and from \c{const
1705 char *} and QByteArrays. If the \a codec is 0 (the default),
1706 QString assumes Latin-1.
1707
1708 \warning Some codecs do not preserve the characters in the ASCII
1709 range (0x00 to 0x7F). For example, the Japanese Shift-JIS
1710 encoding maps the backslash character (0x5A) to the Yen
1711 character. To avoid undesirable side-effects, we recommend
1712 avoiding such codecs with setCodecsForCString().
1713
1714 \sa codecForCStrings(), setCodecForTr()
1715*/
1716
1717/*!
1718 \since 4.4
1719
1720 Tries to detect the encoding of the provided snippet of HTML in
1721 the given byte array, \a ba, by checking the BOM (Byte Order Mark)
1722 and the content-type meta header and returns a QTextCodec instance
1723 that is capable of decoding the html to unicode. If the codec
1724 cannot be detected from the content provided, \a defaultCodec is
1725 returned.
1726
1727 \sa codecForUtfText()
1728*/
1729QTextCodec *QTextCodec::codecForHtml(const QByteArray &ba, QTextCodec *defaultCodec)
1730{
1731 // determine charset
1732 int pos;
1733 QTextCodec *c = 0;
1734
1735 c = QTextCodec::codecForUtfText(ba, c);
1736 if (!c) {
1737 QByteArray header = ba.left(512).toLower();
1738 if ((pos = header.indexOf("http-equiv=")) != -1) {
1739 if ((pos = header.lastIndexOf("meta ", pos)) != -1) {
1740 pos = header.indexOf("charset=", pos) + int(strlen("charset="));
1741 if (pos != -1) {
1742 int pos2 = header.indexOf('\"', pos+1);
1743 QByteArray cs = header.mid(pos, pos2-pos);
1744 // qDebug("found charset: %s", cs.data());
1745 c = QTextCodec::codecForName(cs);
1746 }
1747 }
1748 }
1749 }
1750 if (!c)
1751 c = defaultCodec;
1752
1753 return c;
1754}
1755
1756/*!
1757 \overload
1758
1759 Tries to detect the encoding of the provided snippet of HTML in
1760 the given byte array, \a ba, by checking the BOM (Byte Order Mark)
1761 and the content-type meta header and returns a QTextCodec instance
1762 that is capable of decoding the html to unicode. If the codec cannot
1763 be detected, this overload returns a Latin-1 QTextCodec.
1764*/
1765QTextCodec *QTextCodec::codecForHtml(const QByteArray &ba)
1766{
1767 return codecForHtml(ba, QTextCodec::codecForMib(/*Latin 1*/ 4));
1768}
1769
1770/*!
1771 \since 4.6
1772
1773 Tries to detect the encoding of the provided snippet \a ba by
1774 using the BOM (Byte Order Mark) and returns a QTextCodec instance
1775 that is capable of decoding the text to unicode. If the codec
1776 cannot be detected from the content provided, \a defaultCodec is
1777 returned.
1778
1779 \sa codecForHtml()
1780*/
1781QTextCodec *QTextCodec::codecForUtfText(const QByteArray &ba, QTextCodec *defaultCodec)
1782{
1783 const int arraySize = ba.size();
1784
1785 if (arraySize > 3) {
1786 if ((uchar)ba[0] == 0x00
1787 && (uchar)ba[1] == 0x00
1788 && (uchar)ba[2] == 0xFE
1789 && (uchar)ba[3] == 0xFF)
1790 return QTextCodec::codecForMib(1018); // utf-32 be
1791 else if ((uchar)ba[0] == 0xFF
1792 && (uchar)ba[1] == 0xFE
1793 && (uchar)ba[2] == 0x00
1794 && (uchar)ba[3] == 0x00)
1795 return QTextCodec::codecForMib(1019); // utf-32 le
1796 }
1797
1798 if (arraySize < 2)
1799 return defaultCodec;
1800 if ((uchar)ba[0] == 0xfe && (uchar)ba[1] == 0xff)
1801 return QTextCodec::codecForMib(1013); // utf16 be
1802 else if ((uchar)ba[0] == 0xff && (uchar)ba[1] == 0xfe)
1803 return QTextCodec::codecForMib(1014); // utf16 le
1804
1805 if (arraySize < 3)
1806 return defaultCodec;
1807 if ((uchar)ba[0] == 0xef
1808 && (uchar)ba[1] == 0xbb
1809 && (uchar)ba[2] == 0xbf)
1810 return QTextCodec::codecForMib(106); // utf-8
1811
1812 return defaultCodec;
1813}
1814
1815/*!
1816 \overload
1817
1818 Tries to detect the encoding of the provided snippet \a ba by
1819 using the BOM (Byte Order Mark) and returns a QTextCodec instance
1820 that is capable of decoding the text to unicode. If the codec
1821 cannot be detected, this overload returns a Latin-1 QTextCodec.
1822
1823 \sa codecForHtml()
1824*/
1825QTextCodec *QTextCodec::codecForUtfText(const QByteArray &ba)
1826{
1827 return codecForUtfText(ba, QTextCodec::codecForMib(/*Latin 1*/ 4));
1828}
1829
1830
1831/*! \internal
1832 \since 4.3
1833 Determines whether the decoder encountered a failure while decoding the input. If
1834 an error was encountered, the produced result is undefined, and gets converted as according
1835 to the conversion flags.
1836 */
1837bool QTextDecoder::hasFailure() const
1838{
1839 return state.invalidChars != 0;
1840}
1841
1842/*!
1843 \fn QTextCodec *QTextCodec::codecForContent(const char *str, int size)
1844
1845 This functionality is no longer provided by Qt. This
1846 compatibility function always returns a null pointer.
1847*/
1848
1849/*!
1850 \fn QTextCodec *QTextCodec::codecForName(const char *hint, int accuracy)
1851
1852 Use the codecForName(const QByteArray &) overload instead.
1853*/
1854
1855/*!
1856 \fn QTextCodec *QTextCodec::codecForIndex(int i)
1857
1858 Use availableCodecs() or availableMibs() instead and iterate
1859 through the resulting list.
1860*/
1861
1862
1863/*!
1864 \fn QByteArray QTextCodec::mimeName() const
1865
1866 Use name() instead.
1867*/
1868
1869QT_END_NAMESPACE
1870
1871#endif // QT_NO_TEXTCODEC
1872