1 | /**************************************************************************** |
2 | ** |
3 | ** Copyright (C) 2014 Digia Plc and/or its subsidiary(-ies). |
4 | ** Contact: http://www.qt-project.org/legal |
5 | ** |
6 | ** This file is part of the QtCore module of the Qt Toolkit. |
7 | ** |
8 | ** $QT_BEGIN_LICENSE:LGPL$ |
9 | ** Commercial License Usage |
10 | ** Licensees holding valid commercial Qt licenses may use this file in |
11 | ** accordance with the commercial license agreement provided with the |
12 | ** Software or, alternatively, in accordance with the terms contained in |
13 | ** a written agreement between you and Digia. For licensing terms and |
14 | ** conditions see http://qt.digia.com/licensing. For further information |
15 | ** use the contact form at http://qt.digia.com/contact-us. |
16 | ** |
17 | ** GNU Lesser General Public License Usage |
18 | ** Alternatively, this file may be used under the terms of the GNU Lesser |
19 | ** General Public License version 2.1 as published by the Free Software |
20 | ** Foundation and appearing in the file LICENSE.LGPL included in the |
21 | ** packaging of this file. Please review the following information to |
22 | ** ensure the GNU Lesser General Public License version 2.1 requirements |
23 | ** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html. |
24 | ** |
25 | ** In addition, as a special exception, Digia gives you certain additional |
26 | ** rights. These rights are described in the Digia Qt LGPL Exception |
27 | ** version 1.1, included in the file LGPL_EXCEPTION.txt in this package. |
28 | ** |
29 | ** GNU General Public License Usage |
30 | ** Alternatively, this file may be used under the terms of the GNU |
31 | ** General Public License version 3.0 as published by the Free Software |
32 | ** Foundation and appearing in the file LICENSE.GPL included in the |
33 | ** packaging of this file. Please review the following information to |
34 | ** ensure the GNU General Public License version 3.0 requirements will be |
35 | ** met: http://www.gnu.org/copyleft/gpl.html. |
36 | ** |
37 | ** |
38 | ** $QT_END_LICENSE$ |
39 | ** |
40 | ****************************************************************************/ |
41 | |
42 | #include "qplatformdefs.h" |
43 | #include "qtextcodec.h" |
44 | #include "qtextcodec_p.h" |
45 | |
46 | #ifndef QT_NO_TEXTCODEC |
47 | |
48 | #include "qlist.h" |
49 | #include "qfile.h" |
50 | #include "qvarlengtharray.h" |
51 | #ifndef QT_NO_LIBRARY |
52 | # include "qcoreapplication.h" |
53 | # include "qtextcodecplugin.h" |
54 | # include "private/qfactoryloader_p.h" |
55 | #endif |
56 | #include "qstringlist.h" |
57 | |
58 | #ifdef Q_OS_UNIX |
59 | # include "qiconvcodec_p.h" |
60 | #endif |
61 | |
62 | #include "qutfcodec_p.h" |
63 | #include "qsimplecodec_p.h" |
64 | #include "qlatincodec_p.h" |
65 | #ifndef QT_NO_CODECS |
66 | # include "qtsciicodec_p.h" |
67 | # include "qisciicodec_p.h" |
68 | #if !defined(Q_OS_SYMBIAN) && !defined(Q_OS_INTEGRITY) |
69 | # if defined(QT_NO_ICONV) && !defined(QT_BOOTSTRAPPED) && !defined(QT_CODEC_PLUGINS) |
70 | // no iconv(3) support, must build all codecs into the library |
71 | # include "../../plugins/codecs/cn/qgb18030codec.h" |
72 | # include "../../plugins/codecs/jp/qeucjpcodec.h" |
73 | # include "../../plugins/codecs/jp/qjiscodec.h" |
74 | # include "../../plugins/codecs/jp/qsjiscodec.h" |
75 | # include "../../plugins/codecs/kr/qeuckrcodec.h" |
76 | # include "../../plugins/codecs/tw/qbig5codec.h" |
77 | # endif // QT_NO_ICONV && !QT_BOOTSTRAPPED && !QT_CODEC_PLUGINS |
78 | # if defined(Q_WS_X11) && !defined(QT_BOOTSTRAPPED) |
79 | # include "qfontlaocodec_p.h" |
80 | # include "../../plugins/codecs/jp/qfontjpcodec.h" |
81 | # endif |
82 | #endif // QT_NO_SYMBIAN |
83 | #endif // QT_NO_CODECS |
84 | #include "qlocale.h" |
85 | #include "qmutex.h" |
86 | #include "qhash.h" |
87 | |
88 | #include <stdlib.h> |
89 | #include <ctype.h> |
90 | #include <locale.h> |
91 | #if defined (_XOPEN_UNIX) && !defined(Q_OS_QNX) && !defined(Q_OS_OSF) |
92 | #include <langinfo.h> |
93 | #endif |
94 | |
95 | #if defined(Q_OS_WINCE) |
96 | # define QT_NO_SETLOCALE |
97 | #endif |
98 | |
99 | #ifdef Q_OS_SYMBIAN |
100 | #include "qtextcodec_symbian.cpp" |
101 | #endif |
102 | |
103 | |
104 | // enabling this is not exception safe! |
105 | // #define Q_DEBUG_TEXTCODEC |
106 | |
107 | QT_BEGIN_NAMESPACE |
108 | |
109 | #if !defined(QT_NO_LIBRARY) && !defined(QT_NO_TEXTCODECPLUGIN) |
110 | Q_GLOBAL_STATIC_WITH_ARGS(QFactoryLoader, loader, |
111 | (QTextCodecFactoryInterface_iid, QLatin1String("/codecs" ))) |
112 | #endif |
113 | |
114 | //Cache for QTextCodec::codecForName and codecForMib. |
115 | typedef QHash<QByteArray, QTextCodec *> QTextCodecCache; |
116 | Q_GLOBAL_STATIC(QTextCodecCache, qTextCodecCache) |
117 | |
118 | |
119 | static char qtolower(register char c) |
120 | { if (c >= 'A' && c <= 'Z') return c + 0x20; return c; } |
121 | static bool qisalnum(register char c) |
122 | { return (c >= '0' && c <= '9') || ((c | 0x20) >= 'a' && (c | 0x20) <= 'z'); } |
123 | |
124 | static bool nameMatch(const QByteArray &name, const QByteArray &test) |
125 | { |
126 | // if they're the same, return a perfect score |
127 | if (qstricmp(name, test) == 0) |
128 | return true; |
129 | |
130 | const char *n = name.constData(); |
131 | const char *h = test.constData(); |
132 | |
133 | // if the letters and numbers are the same, we have a match |
134 | while (*n != '\0') { |
135 | if (qisalnum(*n)) { |
136 | for (;;) { |
137 | if (*h == '\0') |
138 | return false; |
139 | if (qisalnum(*h)) |
140 | break; |
141 | ++h; |
142 | } |
143 | if (qtolower(*n) != qtolower(*h)) |
144 | return false; |
145 | ++h; |
146 | } |
147 | ++n; |
148 | } |
149 | while (*h && !qisalnum(*h)) |
150 | ++h; |
151 | return (*h == '\0'); |
152 | } |
153 | |
154 | |
155 | static QTextCodec *createForName(const QByteArray &name) |
156 | { |
157 | #if !defined(QT_NO_LIBRARY) && !defined(QT_NO_TEXTCODECPLUGIN) |
158 | QFactoryLoader *l = loader(); |
159 | QStringList keys = l->keys(); |
160 | for (int i = 0; i < keys.size(); ++i) { |
161 | if (nameMatch(name, keys.at(i).toLatin1())) { |
162 | QString realName = keys.at(i); |
163 | if (QTextCodecFactoryInterface *factory |
164 | = qobject_cast<QTextCodecFactoryInterface*>(l->instance(realName))) { |
165 | return factory->create(realName); |
166 | } |
167 | } |
168 | } |
169 | #else |
170 | Q_UNUSED(name); |
171 | #endif |
172 | return 0; |
173 | } |
174 | |
175 | static QTextCodec *createForMib(int mib) |
176 | { |
177 | #ifndef QT_NO_TEXTCODECPLUGIN |
178 | QString name = QLatin1String("MIB: " ) + QString::number(mib); |
179 | if (QTextCodecFactoryInterface *factory |
180 | = qobject_cast<QTextCodecFactoryInterface*>(loader()->instance(name))) |
181 | return factory->create(name); |
182 | #else |
183 | Q_UNUSED(mib); |
184 | #endif |
185 | return 0; |
186 | } |
187 | |
188 | static QList<QTextCodec*> *all = 0; |
189 | #ifdef Q_DEBUG_TEXTCODEC |
190 | static bool destroying_is_ok = false; |
191 | #endif |
192 | |
193 | static QTextCodec *localeMapper = 0; |
194 | QTextCodec *QTextCodec::cftr = 0; |
195 | |
196 | |
197 | class QTextCodecCleanup |
198 | { |
199 | public: |
200 | ~QTextCodecCleanup(); |
201 | }; |
202 | |
203 | /* |
204 | Deletes all the created codecs. This destructor is called just |
205 | before exiting to delete any QTextCodec objects that may be lying |
206 | around. |
207 | */ |
208 | QTextCodecCleanup::~QTextCodecCleanup() |
209 | { |
210 | if (!all) |
211 | return; |
212 | |
213 | #ifdef Q_DEBUG_TEXTCODEC |
214 | destroying_is_ok = true; |
215 | #endif |
216 | |
217 | QList<QTextCodec *> *myAll = all; |
218 | all = 0; // Otherwise the d'tor destroys the iterator |
219 | for (QList<QTextCodec *>::const_iterator it = myAll->constBegin() |
220 | ; it != myAll->constEnd(); ++it) { |
221 | delete *it; |
222 | } |
223 | delete myAll; |
224 | localeMapper = 0; |
225 | |
226 | #ifdef Q_DEBUG_TEXTCODEC |
227 | destroying_is_ok = false; |
228 | #endif |
229 | } |
230 | |
231 | Q_GLOBAL_STATIC(QTextCodecCleanup, createQTextCodecCleanup) |
232 | |
233 | bool QTextCodec::validCodecs() |
234 | { |
235 | #ifdef Q_OS_SYMBIAN |
236 | // If we don't have a trap handler, we're outside of the main() function, |
237 | // ie. in global constructors or destructors. Don't use codecs in this |
238 | // case as it would lead to crashes because we don't have a cleanup stack on Symbian |
239 | return (User::TrapHandler() != NULL); |
240 | #else |
241 | return true; |
242 | #endif |
243 | } |
244 | |
245 | |
246 | #if defined(Q_OS_WIN32) || defined(Q_OS_WINCE) |
247 | class QWindowsLocalCodec: public QTextCodec |
248 | { |
249 | public: |
250 | QWindowsLocalCodec(); |
251 | ~QWindowsLocalCodec(); |
252 | |
253 | QString convertToUnicode(const char *, int, ConverterState *) const; |
254 | QByteArray convertFromUnicode(const QChar *, int, ConverterState *) const; |
255 | QString convertToUnicodeCharByChar(const char *chars, int length, ConverterState *state) const; |
256 | |
257 | QByteArray name() const; |
258 | int mibEnum() const; |
259 | |
260 | }; |
261 | |
262 | QWindowsLocalCodec::QWindowsLocalCodec() |
263 | { |
264 | } |
265 | |
266 | QWindowsLocalCodec::~QWindowsLocalCodec() |
267 | { |
268 | } |
269 | |
270 | QString QWindowsLocalCodec::convertToUnicode(const char *chars, int length, ConverterState *state) const |
271 | { |
272 | const char *mb = chars; |
273 | int mblen = length; |
274 | |
275 | if (!mb || !mblen) |
276 | return QString(); |
277 | |
278 | QVarLengthArray<wchar_t, 4096> wc(4096); |
279 | int len; |
280 | QString sp; |
281 | bool prepend = false; |
282 | char state_data = 0; |
283 | int remainingChars = 0; |
284 | |
285 | //save the current state information |
286 | if (state) { |
287 | state_data = (char)state->state_data[0]; |
288 | remainingChars = state->remainingChars; |
289 | } |
290 | |
291 | //convert the pending charcter (if available) |
292 | if (state && remainingChars) { |
293 | char prev[3] = {0}; |
294 | prev[0] = state_data; |
295 | prev[1] = mb[0]; |
296 | remainingChars = 0; |
297 | len = MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED, |
298 | prev, 2, wc.data(), wc.size()); |
299 | if (len) { |
300 | prepend = true; |
301 | sp.append(QChar(wc[0])); |
302 | mb++; |
303 | mblen--; |
304 | wc[0] = 0; |
305 | } |
306 | } |
307 | |
308 | while (!(len=MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED|MB_ERR_INVALID_CHARS, |
309 | mb, mblen, wc.data(), wc.size()))) { |
310 | int r = GetLastError(); |
311 | if (r == ERROR_INSUFFICIENT_BUFFER) { |
312 | const int wclen = MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED, |
313 | mb, mblen, 0, 0); |
314 | wc.resize(wclen); |
315 | } else if (r == ERROR_NO_UNICODE_TRANSLATION) { |
316 | //find the last non NULL character |
317 | while (mblen > 1 && !(mb[mblen-1])) |
318 | mblen--; |
319 | //check whether, we hit an invalid character in the middle |
320 | if ((mblen <= 1) || (remainingChars && state_data)) |
321 | return convertToUnicodeCharByChar(chars, length, state); |
322 | //Remove the last character and try again... |
323 | state_data = mb[mblen-1]; |
324 | remainingChars = 1; |
325 | mblen--; |
326 | } else { |
327 | // Fail. |
328 | qWarning("MultiByteToWideChar: Cannot convert multibyte text" ); |
329 | break; |
330 | } |
331 | } |
332 | |
333 | if (len <= 0) |
334 | return QString(); |
335 | |
336 | if (wc[len-1] == 0) // len - 1: we don't want terminator |
337 | --len; |
338 | |
339 | //save the new state information |
340 | if (state) { |
341 | state->state_data[0] = (char)state_data; |
342 | state->remainingChars = remainingChars; |
343 | } |
344 | QString s((QChar*)wc.data(), len); |
345 | if (prepend) { |
346 | return sp+s; |
347 | } |
348 | return s; |
349 | } |
350 | |
351 | QString QWindowsLocalCodec::convertToUnicodeCharByChar(const char *chars, int length, ConverterState *state) const |
352 | { |
353 | if (!chars || !length) |
354 | return QString(); |
355 | |
356 | int copyLocation = 0; |
357 | int extra = 2; |
358 | if (state && state->remainingChars) { |
359 | copyLocation = state->remainingChars; |
360 | extra += copyLocation; |
361 | } |
362 | int newLength = length + extra; |
363 | char *mbcs = new char[newLength]; |
364 | //ensure that we have a NULL terminated string |
365 | mbcs[newLength-1] = 0; |
366 | mbcs[newLength-2] = 0; |
367 | memcpy(&(mbcs[copyLocation]), chars, length); |
368 | if (copyLocation) { |
369 | //copy the last character from the state |
370 | mbcs[0] = (char)state->state_data[0]; |
371 | state->remainingChars = 0; |
372 | } |
373 | const char *mb = mbcs; |
374 | #ifndef Q_OS_WINCE |
375 | const char *next = 0; |
376 | QString s; |
377 | while((next = CharNextExA(CP_ACP, mb, 0)) != mb) { |
378 | wchar_t wc[2] ={0}; |
379 | int charlength = next - mb; |
380 | int len = MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED|MB_ERR_INVALID_CHARS, mb, charlength, wc, 2); |
381 | if (len>0) { |
382 | s.append(QChar(wc[0])); |
383 | } else { |
384 | int r = GetLastError(); |
385 | //check if the character being dropped is the last character |
386 | if (r == ERROR_NO_UNICODE_TRANSLATION && mb == (mbcs+newLength -3) && state) { |
387 | state->remainingChars = 1; |
388 | state->state_data[0] = (char)*mb; |
389 | } |
390 | } |
391 | mb = next; |
392 | } |
393 | #else |
394 | QString s; |
395 | int size = mbstowcs(NULL, mb, length); |
396 | if (size < 0) { |
397 | Q_ASSERT("Error in CE TextCodec" ); |
398 | return QString(); |
399 | } |
400 | wchar_t* ws = new wchar_t[size + 2]; |
401 | ws[size +1] = 0; |
402 | ws[size] = 0; |
403 | size = mbstowcs(ws, mb, length); |
404 | for (int i=0; i< size; i++) |
405 | s.append(QChar(ws[i])); |
406 | delete [] ws; |
407 | #endif |
408 | delete [] mbcs; |
409 | return s; |
410 | } |
411 | |
412 | QByteArray QWindowsLocalCodec::convertFromUnicode(const QChar *ch, int uclen, ConverterState *) const |
413 | { |
414 | if (!ch) |
415 | return QByteArray(); |
416 | if (uclen == 0) |
417 | return QByteArray("" ); |
418 | BOOL used_def; |
419 | QByteArray mb(4096, 0); |
420 | int len; |
421 | while (!(len=WideCharToMultiByte(CP_ACP, 0, (const wchar_t*)ch, uclen, |
422 | mb.data(), mb.size()-1, 0, &used_def))) |
423 | { |
424 | int r = GetLastError(); |
425 | if (r == ERROR_INSUFFICIENT_BUFFER) { |
426 | mb.resize(1+WideCharToMultiByte(CP_ACP, 0, |
427 | (const wchar_t*)ch, uclen, |
428 | 0, 0, 0, &used_def)); |
429 | // and try again... |
430 | } else { |
431 | #ifndef QT_NO_DEBUG |
432 | // Fail. |
433 | qWarning("WideCharToMultiByte: Cannot convert multibyte text (error %d): %s (UTF-8)" , |
434 | r, QString(ch, uclen).toLocal8Bit().data()); |
435 | #endif |
436 | break; |
437 | } |
438 | } |
439 | mb.resize(len); |
440 | return mb; |
441 | } |
442 | |
443 | |
444 | QByteArray QWindowsLocalCodec::name() const |
445 | { |
446 | return "System" ; |
447 | } |
448 | |
449 | int QWindowsLocalCodec::mibEnum() const |
450 | { |
451 | return 0; |
452 | } |
453 | |
454 | #else |
455 | |
456 | /* locale names mostly copied from XFree86 */ |
457 | static const char * const iso8859_2locales[] = { |
458 | "croatian" , "cs" , "cs_CS" , "cs_CZ" ,"cz" , "cz_CZ" , "czech" , "hr" , |
459 | "hr_HR" , "hu" , "hu_HU" , "hungarian" , "pl" , "pl_PL" , "polish" , "ro" , |
460 | "ro_RO" , "rumanian" , "serbocroatian" , "sh" , "sh_SP" , "sh_YU" , "sk" , |
461 | "sk_SK" , "sl" , "sl_CS" , "sl_SI" , "slovak" , "slovene" , "sr_SP" , 0 }; |
462 | |
463 | static const char * const iso8859_3locales[] = { |
464 | "eo" , 0 }; |
465 | |
466 | static const char * const iso8859_4locales[] = { |
467 | "ee" , "ee_EE" , 0 }; |
468 | |
469 | static const char * const iso8859_5locales[] = { |
470 | "mk" , "mk_MK" , "sp" , "sp_YU" , 0 }; |
471 | |
472 | static const char * const cp_1251locales[] = { |
473 | "be" , "be_BY" , "bg" , "bg_BG" , "bulgarian" , 0 }; |
474 | |
475 | static const char * const pt_154locales[] = { |
476 | "ba_RU" , "ky" , "ky_KG" , "kk" , "kk_KZ" , 0 }; |
477 | |
478 | static const char * const iso8859_6locales[] = { |
479 | "ar_AA" , "ar_SA" , "arabic" , 0 }; |
480 | |
481 | static const char * const iso8859_7locales[] = { |
482 | "el" , "el_GR" , "greek" , 0 }; |
483 | |
484 | static const char * const iso8859_8locales[] = { |
485 | "hebrew" , "he" , "he_IL" , "iw" , "iw_IL" , 0 }; |
486 | |
487 | static const char * const iso8859_9locales[] = { |
488 | "tr" , "tr_TR" , "turkish" , 0 }; |
489 | |
490 | static const char * const iso8859_13locales[] = { |
491 | "lt" , "lt_LT" , "lv" , "lv_LV" , 0 }; |
492 | |
493 | static const char * const iso8859_15locales[] = { |
494 | "et" , "et_EE" , |
495 | // Euro countries |
496 | "br_FR" , "ca_ES" , "de" , "de_AT" , "de_BE" , "de_DE" , "de_LU" , "en_IE" , |
497 | "es" , "es_ES" , "eu_ES" , "fi" , "fi_FI" , "finnish" , "fr" , "fr_FR" , |
498 | "fr_BE" , "fr_LU" , "french" , "ga_IE" , "gl_ES" , "it" , "it_IT" , "oc_FR" , |
499 | "nl" , "nl_BE" , "nl_NL" , "pt" , "pt_PT" , "sv_FI" , "wa_BE" , |
500 | 0 }; |
501 | |
502 | static const char * const koi8_ulocales[] = { |
503 | "uk" , "uk_UA" , "ru_UA" , "ukrainian" , 0 }; |
504 | |
505 | static const char * const tis_620locales[] = { |
506 | "th" , "th_TH" , "thai" , 0 }; |
507 | |
508 | // static const char * const tcvnlocales[] = { |
509 | // "vi", "vi_VN", 0 }; |
510 | |
511 | static bool try_locale_list(const char * const locale[], const QByteArray &lang) |
512 | { |
513 | int i; |
514 | for(i=0; locale[i] && lang != locale[i]; i++) |
515 | ; |
516 | return locale[i] != 0; |
517 | } |
518 | |
519 | // For the probably_koi8_locales we have to look. the standard says |
520 | // these are 8859-5, but almost all Russian users use KOI8-R and |
521 | // incorrectly set $LANG to ru_RU. We'll check tolower() to see what |
522 | // it thinks ru_RU means. |
523 | |
524 | // If you read the history, it seems that many Russians blame ISO and |
525 | // Perestroika for the confusion. |
526 | // |
527 | // The real bug is that some programs break if the user specifies |
528 | // ru_RU.KOI8-R. |
529 | |
530 | static const char * const probably_koi8_rlocales[] = { |
531 | "ru" , "ru_SU" , "ru_RU" , "russian" , 0 }; |
532 | |
533 | static QTextCodec * ru_RU_hack(const char * i) { |
534 | QTextCodec * ru_RU_codec = 0; |
535 | |
536 | #if !defined(QT_NO_SETLOCALE) |
537 | QByteArray origlocale(setlocale(LC_CTYPE, i)); |
538 | #else |
539 | QByteArray origlocale(i); |
540 | #endif |
541 | // unicode koi8r latin5 name |
542 | // 0x044E 0xC0 0xEE CYRILLIC SMALL LETTER YU |
543 | // 0x042E 0xE0 0xCE CYRILLIC CAPITAL LETTER YU |
544 | int latin5 = tolower(0xCE); |
545 | int koi8r = tolower(0xE0); |
546 | if (koi8r == 0xC0 && latin5 != 0xEE) { |
547 | ru_RU_codec = QTextCodec::codecForName("KOI8-R" ); |
548 | } else if (koi8r != 0xC0 && latin5 == 0xEE) { |
549 | ru_RU_codec = QTextCodec::codecForName("ISO 8859-5" ); |
550 | } else { |
551 | // something else again... let's assume... *throws dice* |
552 | ru_RU_codec = QTextCodec::codecForName("KOI8-R" ); |
553 | qWarning("QTextCodec: Using KOI8-R, probe failed (%02x %02x %s)" , |
554 | koi8r, latin5, i); |
555 | } |
556 | #if !defined(QT_NO_SETLOCALE) |
557 | setlocale(LC_CTYPE, origlocale); |
558 | #endif |
559 | |
560 | return ru_RU_codec; |
561 | } |
562 | |
563 | #endif |
564 | |
565 | #if !defined(Q_OS_WIN32) && !defined(Q_OS_WINCE) |
566 | static QTextCodec *checkForCodec(const QByteArray &name) { |
567 | QTextCodec *c = QTextCodec::codecForName(name); |
568 | if (!c) { |
569 | const int index = name.indexOf('@'); |
570 | if (index != -1) { |
571 | c = QTextCodec::codecForName(name.left(index)); |
572 | } |
573 | } |
574 | return c; |
575 | } |
576 | #endif |
577 | |
578 | /* the next two functions are implicitely thread safe, |
579 | as they are only called by setup() which uses a mutex. |
580 | */ |
581 | static void setupLocaleMapper() |
582 | { |
583 | #ifdef Q_OS_SYMBIAN |
584 | localeMapper = QSymbianTextCodec::localeMapper; |
585 | if (localeMapper) |
586 | return; |
587 | #endif |
588 | |
589 | #if defined(Q_OS_WIN32) || defined(Q_OS_WINCE) |
590 | localeMapper = QTextCodec::codecForName("System" ); |
591 | #else |
592 | |
593 | #ifndef QT_NO_ICONV |
594 | localeMapper = QTextCodec::codecForName("System" ); |
595 | #endif |
596 | |
597 | #if defined (_XOPEN_UNIX) && !defined(Q_OS_QNX) && !defined(Q_OS_OSF) |
598 | if (!localeMapper) { |
599 | char *charset = nl_langinfo (CODESET); |
600 | if (charset) |
601 | localeMapper = QTextCodec::codecForName(charset); |
602 | } |
603 | #endif |
604 | |
605 | if (!localeMapper) { |
606 | // Very poorly defined and followed standards causes lots of |
607 | // code to try to get all the cases... This logic is |
608 | // duplicated in QIconvCodec, so if you change it here, change |
609 | // it there too. |
610 | |
611 | // Try to determine locale codeset from locale name assigned to |
612 | // LC_CTYPE category. |
613 | |
614 | // First part is getting that locale name. First try setlocale() which |
615 | // definitely knows it, but since we cannot fully trust it, get ready |
616 | // to fall back to environment variables. |
617 | #if !defined(QT_NO_SETLOCALE) |
618 | const QByteArray ctype = setlocale(LC_CTYPE, 0); |
619 | #else |
620 | const QByteArray ctype; |
621 | #endif |
622 | |
623 | // Get the first nonempty value from $LC_ALL, $LC_CTYPE, and $LANG |
624 | // environment variables. |
625 | QByteArray lang = qgetenv("LC_ALL" ); |
626 | if (lang.isEmpty() || lang == "C" ) { |
627 | lang = qgetenv("LC_CTYPE" ); |
628 | } |
629 | if (lang.isEmpty() || lang == "C" ) { |
630 | lang = qgetenv("LANG" ); |
631 | } |
632 | |
633 | // Now try these in order: |
634 | // 1. CODESET from ctype if it contains a .CODESET part (e.g. en_US.ISO8859-15) |
635 | // 2. CODESET from lang if it contains a .CODESET part |
636 | // 3. ctype (maybe the locale is named "ISO-8859-1" or something) |
637 | // 4. locale (ditto) |
638 | // 5. check for "@euro" |
639 | // 6. guess locale from ctype unless ctype is "C" |
640 | // 7. guess locale from lang |
641 | |
642 | // 1. CODESET from ctype if it contains a .CODESET part (e.g. en_US.ISO8859-15) |
643 | int indexOfDot = ctype.indexOf('.'); |
644 | if (indexOfDot != -1) |
645 | localeMapper = checkForCodec( ctype.mid(indexOfDot + 1) ); |
646 | |
647 | // 2. CODESET from lang if it contains a .CODESET part |
648 | if (!localeMapper) { |
649 | indexOfDot = lang.indexOf('.'); |
650 | if (indexOfDot != -1) |
651 | localeMapper = checkForCodec( lang.mid(indexOfDot + 1) ); |
652 | } |
653 | |
654 | // 3. ctype (maybe the locale is named "ISO-8859-1" or something) |
655 | if (!localeMapper && !ctype.isEmpty() && ctype != "C" ) |
656 | localeMapper = checkForCodec(ctype); |
657 | |
658 | // 4. locale (ditto) |
659 | if (!localeMapper && !lang.isEmpty()) |
660 | localeMapper = checkForCodec(lang); |
661 | |
662 | // 5. "@euro" |
663 | if ((!localeMapper && ctype.contains("@euro" )) || lang.contains("@euro" )) |
664 | localeMapper = checkForCodec("ISO 8859-15" ); |
665 | |
666 | // 6. guess locale from ctype unless ctype is "C" |
667 | // 7. guess locale from lang |
668 | const QByteArray &try_by_name = (!ctype.isEmpty() && ctype != "C" ) ? lang : ctype; |
669 | |
670 | // Now do the guessing. |
671 | if (!lang.isEmpty() && !localeMapper && !try_by_name.isEmpty()) { |
672 | if (try_locale_list(iso8859_15locales, lang)) |
673 | localeMapper = QTextCodec::codecForName("ISO 8859-15" ); |
674 | else if (try_locale_list(iso8859_2locales, lang)) |
675 | localeMapper = QTextCodec::codecForName("ISO 8859-2" ); |
676 | else if (try_locale_list(iso8859_3locales, lang)) |
677 | localeMapper = QTextCodec::codecForName("ISO 8859-3" ); |
678 | else if (try_locale_list(iso8859_4locales, lang)) |
679 | localeMapper = QTextCodec::codecForName("ISO 8859-4" ); |
680 | else if (try_locale_list(iso8859_5locales, lang)) |
681 | localeMapper = QTextCodec::codecForName("ISO 8859-5" ); |
682 | else if (try_locale_list(iso8859_6locales, lang)) |
683 | localeMapper = QTextCodec::codecForName("ISO 8859-6" ); |
684 | else if (try_locale_list(iso8859_7locales, lang)) |
685 | localeMapper = QTextCodec::codecForName("ISO 8859-7" ); |
686 | else if (try_locale_list(iso8859_8locales, lang)) |
687 | localeMapper = QTextCodec::codecForName("ISO 8859-8-I" ); |
688 | else if (try_locale_list(iso8859_9locales, lang)) |
689 | localeMapper = QTextCodec::codecForName("ISO 8859-9" ); |
690 | else if (try_locale_list(iso8859_13locales, lang)) |
691 | localeMapper = QTextCodec::codecForName("ISO 8859-13" ); |
692 | else if (try_locale_list(tis_620locales, lang)) |
693 | localeMapper = QTextCodec::codecForName("ISO 8859-11" ); |
694 | else if (try_locale_list(koi8_ulocales, lang)) |
695 | localeMapper = QTextCodec::codecForName("KOI8-U" ); |
696 | else if (try_locale_list(cp_1251locales, lang)) |
697 | localeMapper = QTextCodec::codecForName("CP 1251" ); |
698 | else if (try_locale_list(pt_154locales, lang)) |
699 | localeMapper = QTextCodec::codecForName("PT 154" ); |
700 | else if (try_locale_list(probably_koi8_rlocales, lang)) |
701 | localeMapper = ru_RU_hack(lang); |
702 | } |
703 | |
704 | } |
705 | |
706 | // If everything failed, we default to 8859-1 |
707 | // We could perhaps default to 8859-15. |
708 | if (!localeMapper) |
709 | localeMapper = QTextCodec::codecForName("ISO 8859-1" ); |
710 | #endif |
711 | } |
712 | |
713 | #ifndef QT_NO_THREAD |
714 | Q_GLOBAL_STATIC_WITH_ARGS(QMutex, textCodecsMutex, (QMutex::Recursive)); |
715 | #endif |
716 | |
717 | // textCodecsMutex need to be locked to enter this function |
718 | static void setup() |
719 | { |
720 | if (all) |
721 | return; |
722 | |
723 | #ifdef Q_OS_SYMBIAN |
724 | // If we don't have a trap handler, we're outside of the main() function, |
725 | // ie. in global constructors or destructors. Don't create codecs in this |
726 | // case as it would lead to crashes because of a missing cleanup stack on Symbian |
727 | if (User::TrapHandler() == NULL) |
728 | return; |
729 | #endif |
730 | |
731 | #ifdef Q_DEBUG_TEXTCODEC |
732 | if (destroying_is_ok) |
733 | qWarning("QTextCodec: Creating new codec during codec cleanup" ); |
734 | #endif |
735 | all = new QList<QTextCodec*>; |
736 | // create the cleanup object to cleanup all codecs on exit |
737 | (void) createQTextCodecCleanup(); |
738 | |
739 | #ifndef QT_NO_CODECS |
740 | (void)new QTsciiCodec; |
741 | for (int i = 0; i < 9; ++i) |
742 | (void)new QIsciiCodec(i); |
743 | |
744 | for (int i = 0; i < QSimpleTextCodec::numSimpleCodecs; ++i) |
745 | (void)new QSimpleTextCodec(i); |
746 | |
747 | #ifdef Q_OS_SYMBIAN |
748 | localeMapper = QSymbianTextCodec::init(); |
749 | #endif |
750 | |
751 | # if defined(Q_WS_X11) && !defined(QT_BOOTSTRAPPED) |
752 | // no font codecs when bootstrapping |
753 | (void)new QFontLaoCodec; |
754 | # if defined(QT_NO_ICONV) |
755 | // no iconv(3) support, must build all codecs into the library |
756 | (void)new QFontGb2312Codec; |
757 | (void)new QFontGbkCodec; |
758 | (void)new QFontGb18030_0Codec; |
759 | (void)new QFontJis0208Codec; |
760 | (void)new QFontJis0201Codec; |
761 | (void)new QFontKsc5601Codec; |
762 | (void)new QFontBig5hkscsCodec; |
763 | (void)new QFontBig5Codec; |
764 | # endif // QT_NO_ICONV && !QT_BOOTSTRAPPED |
765 | # endif // Q_WS_X11 |
766 | |
767 | |
768 | #if !defined(Q_OS_SYMBIAN) && !defined(Q_OS_INTEGRITY) |
769 | # if defined(QT_NO_ICONV) && !defined(QT_BOOTSTRAPPED) && !defined(QT_CODEC_PLUGINS) |
770 | // no asian codecs when bootstrapping, sorry |
771 | (void)new QGb18030Codec; |
772 | (void)new QGbkCodec; |
773 | (void)new QGb2312Codec; |
774 | (void)new QEucJpCodec; |
775 | (void)new QJisCodec; |
776 | (void)new QSjisCodec; |
777 | (void)new QEucKrCodec; |
778 | (void)new QCP949Codec; |
779 | (void)new QBig5Codec; |
780 | (void)new QBig5hkscsCodec; |
781 | # endif // QT_NO_ICONV && !QT_BOOTSTRAPPED && !QT_CODEC_PLUGINS |
782 | #endif //Q_OS_SYMBIAN |
783 | #endif // QT_NO_CODECS |
784 | |
785 | #if defined(Q_OS_WIN32) || defined(Q_OS_WINCE) |
786 | (void) new QWindowsLocalCodec; |
787 | #endif // Q_OS_WIN32 |
788 | |
789 | (void)new QUtf16Codec; |
790 | (void)new QUtf16BECodec; |
791 | (void)new QUtf16LECodec; |
792 | (void)new QUtf32Codec; |
793 | (void)new QUtf32BECodec; |
794 | (void)new QUtf32LECodec; |
795 | #ifndef Q_OS_SYMBIAN |
796 | (void)new QLatin15Codec; |
797 | #endif |
798 | (void)new QLatin1Codec; |
799 | (void)new QUtf8Codec; |
800 | |
801 | #if !defined(Q_OS_SYMBIAN) && !defined(Q_OS_INTEGRITY) |
802 | #if defined(Q_OS_UNIX) && !defined(QT_NO_ICONV) && !defined(QT_BOOTSTRAPPED) |
803 | // QIconvCodec depends on the UTF-16 codec, so it needs to be created last |
804 | (void) new QIconvCodec(); |
805 | #endif |
806 | #endif |
807 | |
808 | if (!localeMapper) |
809 | setupLocaleMapper(); |
810 | } |
811 | |
812 | /*! |
813 | \enum QTextCodec::ConversionFlag |
814 | |
815 | \value DefaultConversion No flag is set. |
816 | \value ConvertInvalidToNull If this flag is set, each invalid input |
817 | character is output as a null character. |
818 | \value IgnoreHeader Ignore any Unicode byte-order mark and don't generate any. |
819 | |
820 | \omitvalue FreeFunction |
821 | */ |
822 | |
823 | /*! |
824 | \fn QTextCodec::ConverterState::ConverterState(ConversionFlags flags) |
825 | |
826 | Constructs a ConverterState object initialized with the given \a flags. |
827 | */ |
828 | |
829 | /*! |
830 | Destroys the ConverterState object. |
831 | */ |
832 | QTextCodec::ConverterState::~ConverterState() |
833 | { |
834 | if (flags & FreeFunction) |
835 | (QTextCodecUnalignedPointer::decode(state_data))(this); |
836 | else if (d) |
837 | qFree(d); |
838 | } |
839 | |
840 | /*! |
841 | \class QTextCodec |
842 | \brief The QTextCodec class provides conversions between text encodings. |
843 | \reentrant |
844 | \ingroup i18n |
845 | |
846 | Qt uses Unicode to store, draw and manipulate strings. In many |
847 | situations you may wish to deal with data that uses a different |
848 | encoding. For example, most Japanese documents are still stored |
849 | in Shift-JIS or ISO 2022-JP, while Russian users often have their |
850 | documents in KOI8-R or Windows-1251. |
851 | |
852 | Qt provides a set of QTextCodec classes to help with converting |
853 | non-Unicode formats to and from Unicode. You can also create your |
854 | own codec classes. |
855 | |
856 | The supported encodings are: |
857 | |
858 | \list |
859 | \o Apple Roman |
860 | \o \l{Big5 Text Codec}{Big5} |
861 | \o \l{Big5-HKSCS Text Codec}{Big5-HKSCS} |
862 | \o CP949 |
863 | \o \l{EUC-JP Text Codec}{EUC-JP} |
864 | \o \l{EUC-KR Text Codec}{EUC-KR} |
865 | \o \l{GBK Text Codec}{GB18030-0} |
866 | \o IBM 850 |
867 | \o IBM 866 |
868 | \o IBM 874 |
869 | \o \l{ISO 2022-JP (JIS) Text Codec}{ISO 2022-JP} |
870 | \o ISO 8859-1 to 10 |
871 | \o ISO 8859-13 to 16 |
872 | \o Iscii-Bng, Dev, Gjr, Knd, Mlm, Ori, Pnj, Tlg, and Tml |
873 | \o JIS X 0201 |
874 | \o JIS X 0208 |
875 | \o KOI8-R |
876 | \o KOI8-U |
877 | \o MuleLao-1 |
878 | \o ROMAN8 |
879 | \o \l{Shift-JIS Text Codec}{Shift-JIS} |
880 | \o TIS-620 |
881 | \o \l{TSCII Text Codec}{TSCII} |
882 | \o UTF-8 |
883 | \o UTF-16 |
884 | \o UTF-16BE |
885 | \o UTF-16LE |
886 | \o UTF-32 |
887 | \o UTF-32BE |
888 | \o UTF-32LE |
889 | \o Windows-1250 to 1258 |
890 | \o WINSAMI2 |
891 | \endlist |
892 | |
893 | QTextCodecs can be used as follows to convert some locally encoded |
894 | string to Unicode. Suppose you have some string encoded in Russian |
895 | KOI8-R encoding, and want to convert it to Unicode. The simple way |
896 | to do it is like this: |
897 | |
898 | \snippet doc/src/snippets/code/src_corelib_codecs_qtextcodec.cpp 0 |
899 | |
900 | After this, \c string holds the text converted to Unicode. |
901 | Converting a string from Unicode to the local encoding is just as |
902 | easy: |
903 | |
904 | \snippet doc/src/snippets/code/src_corelib_codecs_qtextcodec.cpp 1 |
905 | |
906 | To read or write files in various encodings, use QTextStream and |
907 | its \l{QTextStream::setCodec()}{setCodec()} function. See the |
908 | \l{tools/codecs}{Codecs} example for an application of QTextCodec |
909 | to file I/O. |
910 | |
911 | Some care must be taken when trying to convert the data in chunks, |
912 | for example, when receiving it over a network. In such cases it is |
913 | possible that a multi-byte character will be split over two |
914 | chunks. At best this might result in the loss of a character and |
915 | at worst cause the entire conversion to fail. |
916 | |
917 | The approach to use in these situations is to create a QTextDecoder |
918 | object for the codec and use this QTextDecoder for the whole |
919 | decoding process, as shown below: |
920 | |
921 | \snippet doc/src/snippets/code/src_corelib_codecs_qtextcodec.cpp 2 |
922 | |
923 | The QTextDecoder object maintains state between chunks and therefore |
924 | works correctly even if a multi-byte character is split between |
925 | chunks. |
926 | |
927 | \section1 Creating Your Own Codec Class |
928 | |
929 | Support for new text encodings can be added to Qt by creating |
930 | QTextCodec subclasses. |
931 | |
932 | The pure virtual functions describe the encoder to the system and |
933 | the coder is used as required in the different text file formats |
934 | supported by QTextStream, and under X11, for the locale-specific |
935 | character input and output. |
936 | |
937 | To add support for another encoding to Qt, make a subclass of |
938 | QTextCodec and implement the functions listed in the table below. |
939 | |
940 | \table |
941 | \header \o Function \o Description |
942 | |
943 | \row \o name() |
944 | \o Returns the official name for the encoding. If the |
945 | encoding is listed in the |
946 | \l{IANA character-sets encoding file}, the name |
947 | should be the preferred MIME name for the encoding. |
948 | |
949 | \row \o aliases() |
950 | \o Returns a list of alternative names for the encoding. |
951 | QTextCodec provides a default implementation that returns |
952 | an empty list. For example, "ISO-8859-1" has "latin1", |
953 | "CP819", "IBM819", and "iso-ir-100" as aliases. |
954 | |
955 | \row \o mibEnum() |
956 | \o Return the MIB enum for the encoding if it is listed in |
957 | the \l{IANA character-sets encoding file}. |
958 | |
959 | \row \o convertToUnicode() |
960 | \o Converts an 8-bit character string to Unicode. |
961 | |
962 | \row \o convertFromUnicode() |
963 | \o Converts a Unicode string to an 8-bit character string. |
964 | \endtable |
965 | |
966 | You may find it more convenient to make your codec class |
967 | available as a plugin; see \l{How to Create Qt Plugins} for |
968 | details. |
969 | |
970 | \sa QTextStream, QTextDecoder, QTextEncoder, {Codecs Example} |
971 | */ |
972 | |
973 | /*! |
974 | Constructs a QTextCodec, and gives it the highest precedence. The |
975 | QTextCodec should always be constructed on the heap (i.e. with \c |
976 | new). Qt takes ownership and will delete it when the application |
977 | terminates. |
978 | */ |
979 | QTextCodec::QTextCodec() |
980 | { |
981 | #ifndef QT_NO_THREAD |
982 | QMutexLocker locker(textCodecsMutex()); |
983 | #endif |
984 | setup(); |
985 | all->prepend(this); |
986 | } |
987 | |
988 | |
989 | /*! |
990 | \nonreentrant |
991 | |
992 | Destroys the QTextCodec. Note that you should not delete codecs |
993 | yourself: once created they become Qt's responsibility. |
994 | */ |
995 | QTextCodec::~QTextCodec() |
996 | { |
997 | #ifdef Q_DEBUG_TEXTCODEC |
998 | if (!destroying_is_ok) |
999 | qWarning("QTextCodec::~QTextCodec: Called by application" ); |
1000 | #endif |
1001 | if (all) { |
1002 | #ifndef QT_NO_THREAD |
1003 | QMutexLocker locker(textCodecsMutex()); |
1004 | #endif |
1005 | all->removeAll(this); |
1006 | QTextCodecCache *cache = qTextCodecCache(); |
1007 | if (cache) |
1008 | cache->clear(); |
1009 | } |
1010 | } |
1011 | |
1012 | /*! |
1013 | \fn QTextCodec *QTextCodec::codecForName(const char *name) |
1014 | |
1015 | Searches all installed QTextCodec objects and returns the one |
1016 | which best matches \a name; the match is case-insensitive. Returns |
1017 | 0 if no codec matching the name \a name could be found. |
1018 | */ |
1019 | |
1020 | /*! |
1021 | Searches all installed QTextCodec objects and returns the one |
1022 | which best matches \a name; the match is case-insensitive. Returns |
1023 | 0 if no codec matching the name \a name could be found. |
1024 | */ |
1025 | QTextCodec *QTextCodec::codecForName(const QByteArray &name) |
1026 | { |
1027 | if (name.isEmpty()) |
1028 | return 0; |
1029 | |
1030 | #ifndef QT_NO_THREAD |
1031 | QMutexLocker locker(textCodecsMutex()); |
1032 | #endif |
1033 | setup(); |
1034 | |
1035 | if (!validCodecs()) |
1036 | return 0; |
1037 | |
1038 | QTextCodecCache *cache = qTextCodecCache(); |
1039 | QTextCodec *codec; |
1040 | if (cache) { |
1041 | codec = cache->value(name); |
1042 | if (codec) |
1043 | return codec; |
1044 | } |
1045 | |
1046 | for (int i = 0; i < all->size(); ++i) { |
1047 | QTextCodec *cursor = all->at(i); |
1048 | if (nameMatch(cursor->name(), name)) { |
1049 | if (cache) |
1050 | cache->insert(name, cursor); |
1051 | return cursor; |
1052 | } |
1053 | QList<QByteArray> aliases = cursor->aliases(); |
1054 | for (int y = 0; y < aliases.size(); ++y) |
1055 | if (nameMatch(aliases.at(y), name)) { |
1056 | if (cache) |
1057 | cache->insert(name, cursor); |
1058 | return cursor; |
1059 | } |
1060 | } |
1061 | |
1062 | codec = createForName(name); |
1063 | if (codec && cache) |
1064 | cache->insert(name, codec); |
1065 | return codec; |
1066 | } |
1067 | |
1068 | |
1069 | /*! |
1070 | Returns the QTextCodec which matches the \link |
1071 | QTextCodec::mibEnum() MIBenum\endlink \a mib. |
1072 | */ |
1073 | QTextCodec* QTextCodec::codecForMib(int mib) |
1074 | { |
1075 | #ifndef QT_NO_THREAD |
1076 | QMutexLocker locker(textCodecsMutex()); |
1077 | #endif |
1078 | setup(); |
1079 | |
1080 | if (!validCodecs()) |
1081 | return 0; |
1082 | |
1083 | QByteArray key = "MIB: " + QByteArray::number(mib); |
1084 | QTextCodecCache *cache = qTextCodecCache(); |
1085 | QTextCodec *codec; |
1086 | if (cache) { |
1087 | codec = cache->value(key); |
1088 | if (codec) |
1089 | return codec; |
1090 | } |
1091 | |
1092 | QList<QTextCodec*>::ConstIterator i; |
1093 | for (int i = 0; i < all->size(); ++i) { |
1094 | QTextCodec *cursor = all->at(i); |
1095 | if (cursor->mibEnum() == mib) { |
1096 | if (cache) |
1097 | cache->insert(key, cursor); |
1098 | return cursor; |
1099 | } |
1100 | } |
1101 | |
1102 | codec = createForMib(mib); |
1103 | |
1104 | // Qt 3 used 1000 (mib for UCS2) as its identifier for the utf16 codec. Map |
1105 | // this correctly for compatibility. |
1106 | if (!codec && mib == 1000) |
1107 | return codecForMib(1015); |
1108 | |
1109 | if (codec && cache) |
1110 | cache->insert(key, codec); |
1111 | return codec; |
1112 | } |
1113 | |
1114 | /*! |
1115 | Returns the list of all available codecs, by name. Call |
1116 | QTextCodec::codecForName() to obtain the QTextCodec for the name. |
1117 | |
1118 | The list may contain many mentions of the same codec |
1119 | if the codec has aliases. |
1120 | |
1121 | \sa availableMibs(), name(), aliases() |
1122 | */ |
1123 | QList<QByteArray> QTextCodec::availableCodecs() |
1124 | { |
1125 | #ifndef QT_NO_THREAD |
1126 | QMutexLocker locker(textCodecsMutex()); |
1127 | #endif |
1128 | setup(); |
1129 | |
1130 | QList<QByteArray> codecs; |
1131 | |
1132 | if (!validCodecs()) |
1133 | return codecs; |
1134 | |
1135 | for (int i = 0; i < all->size(); ++i) { |
1136 | codecs += all->at(i)->name(); |
1137 | codecs += all->at(i)->aliases(); |
1138 | } |
1139 | |
1140 | #ifndef QT_NO_THREAD |
1141 | locker.unlock(); |
1142 | #endif |
1143 | |
1144 | #if !defined(QT_NO_LIBRARY) && !defined(QT_NO_TEXTCODECPLUGIN) |
1145 | QFactoryLoader *l = loader(); |
1146 | QStringList keys = l->keys(); |
1147 | for (int i = 0; i < keys.size(); ++i) { |
1148 | if (!keys.at(i).startsWith(QLatin1String("MIB: " ))) { |
1149 | QByteArray name = keys.at(i).toLatin1(); |
1150 | if (!codecs.contains(name)) |
1151 | codecs += name; |
1152 | } |
1153 | } |
1154 | #endif |
1155 | |
1156 | return codecs; |
1157 | } |
1158 | |
1159 | /*! |
1160 | Returns the list of MIBs for all available codecs. Call |
1161 | QTextCodec::codecForMib() to obtain the QTextCodec for the MIB. |
1162 | |
1163 | \sa availableCodecs(), mibEnum() |
1164 | */ |
1165 | QList<int> QTextCodec::availableMibs() |
1166 | { |
1167 | #ifndef QT_NO_THREAD |
1168 | QMutexLocker locker(textCodecsMutex()); |
1169 | #endif |
1170 | setup(); |
1171 | |
1172 | QList<int> codecs; |
1173 | |
1174 | if (!validCodecs()) |
1175 | return codecs; |
1176 | |
1177 | for (int i = 0; i < all->size(); ++i) |
1178 | codecs += all->at(i)->mibEnum(); |
1179 | |
1180 | #ifndef QT_NO_THREAD |
1181 | locker.unlock(); |
1182 | #endif |
1183 | |
1184 | #if !defined(QT_NO_LIBRARY) && !defined(QT_NO_TEXTCODECPLUGIN) |
1185 | QFactoryLoader *l = loader(); |
1186 | QStringList keys = l->keys(); |
1187 | for (int i = 0; i < keys.size(); ++i) { |
1188 | if (keys.at(i).startsWith(QLatin1String("MIB: " ))) { |
1189 | int mib = keys.at(i).mid(5).toInt(); |
1190 | if (!codecs.contains(mib)) |
1191 | codecs += mib; |
1192 | } |
1193 | } |
1194 | #endif |
1195 | |
1196 | return codecs; |
1197 | } |
1198 | |
1199 | /*! |
1200 | Set the codec to \a c; this will be returned by |
1201 | codecForLocale(). If \a c is a null pointer, the codec is reset to |
1202 | the default. |
1203 | |
1204 | This might be needed for some applications that want to use their |
1205 | own mechanism for setting the locale. |
1206 | |
1207 | \sa codecForLocale() |
1208 | */ |
1209 | void QTextCodec::setCodecForLocale(QTextCodec *c) |
1210 | { |
1211 | #ifndef QT_NO_THREAD |
1212 | QMutexLocker locker(textCodecsMutex()); |
1213 | #endif |
1214 | localeMapper = c; |
1215 | if (!localeMapper) |
1216 | setupLocaleMapper(); |
1217 | } |
1218 | |
1219 | /*! |
1220 | Returns a pointer to the codec most suitable for this locale. |
1221 | |
1222 | On Windows, the codec will be based on a system locale. On Unix |
1223 | systems, starting with Qt 4.2, the codec will be using the \e |
1224 | iconv library. Note that in both cases the codec's name will be |
1225 | "System". |
1226 | */ |
1227 | |
1228 | QTextCodec* QTextCodec::codecForLocale() |
1229 | { |
1230 | if (!validCodecs()) |
1231 | return 0; |
1232 | |
1233 | if (localeMapper) |
1234 | return localeMapper; |
1235 | |
1236 | #ifndef QT_NO_THREAD |
1237 | QMutexLocker locker(textCodecsMutex()); |
1238 | #endif |
1239 | setup(); |
1240 | |
1241 | return localeMapper; |
1242 | } |
1243 | |
1244 | |
1245 | /*! |
1246 | \fn QByteArray QTextCodec::name() const |
1247 | |
1248 | QTextCodec subclasses must reimplement this function. It returns |
1249 | the name of the encoding supported by the subclass. |
1250 | |
1251 | If the codec is registered as a character set in the |
1252 | \l{IANA character-sets encoding file} this method should |
1253 | return the preferred mime name for the codec if defined, |
1254 | otherwise its name. |
1255 | */ |
1256 | |
1257 | /*! |
1258 | \fn int QTextCodec::mibEnum() const |
1259 | |
1260 | Subclasses of QTextCodec must reimplement this function. It |
1261 | returns the MIBenum (see \l{IANA character-sets encoding file} |
1262 | for more information). It is important that each QTextCodec |
1263 | subclass returns the correct unique value for this function. |
1264 | */ |
1265 | |
1266 | /*! |
1267 | Subclasses can return a number of aliases for the codec in question. |
1268 | |
1269 | Standard aliases for codecs can be found in the |
1270 | \l{IANA character-sets encoding file}. |
1271 | */ |
1272 | QList<QByteArray> QTextCodec::aliases() const |
1273 | { |
1274 | return QList<QByteArray>(); |
1275 | } |
1276 | |
1277 | /*! |
1278 | \fn QString QTextCodec::convertToUnicode(const char *chars, int len, |
1279 | ConverterState *state) const |
1280 | |
1281 | QTextCodec subclasses must reimplement this function. |
1282 | |
1283 | Converts the first \a len characters of \a chars from the |
1284 | encoding of the subclass to Unicode, and returns the result in a |
1285 | QString. |
1286 | |
1287 | \a state can be 0, in which case the conversion is stateless and |
1288 | default conversion rules should be used. If state is not 0, the |
1289 | codec should save the state after the conversion in \a state, and |
1290 | adjust the remainingChars and invalidChars members of the struct. |
1291 | */ |
1292 | |
1293 | /*! |
1294 | \fn QByteArray QTextCodec::convertFromUnicode(const QChar *input, int number, |
1295 | ConverterState *state) const |
1296 | |
1297 | QTextCodec subclasses must reimplement this function. |
1298 | |
1299 | Converts the first \a number of characters from the \a input array |
1300 | from Unicode to the encoding of the subclass, and returns the result |
1301 | in a QByteArray. |
1302 | |
1303 | \a state can be 0 in which case the conversion is stateless and |
1304 | default conversion rules should be used. If state is not 0, the |
1305 | codec should save the state after the conversion in \a state, and |
1306 | adjust the remainingChars and invalidChars members of the struct. |
1307 | */ |
1308 | |
1309 | /*! |
1310 | Creates a QTextDecoder which stores enough state to decode chunks |
1311 | of \c{char *} data to create chunks of Unicode data. |
1312 | |
1313 | The caller is responsible for deleting the returned object. |
1314 | */ |
1315 | QTextDecoder* QTextCodec::makeDecoder() const |
1316 | { |
1317 | return new QTextDecoder(this); |
1318 | } |
1319 | |
1320 | /*! |
1321 | Creates a QTextDecoder with a specified \a flags to decode chunks |
1322 | of \c{char *} data to create chunks of Unicode data. |
1323 | |
1324 | The caller is responsible for deleting the returned object. |
1325 | |
1326 | \since 4.7 |
1327 | */ |
1328 | QTextDecoder* QTextCodec::makeDecoder(QTextCodec::ConversionFlags flags) const |
1329 | { |
1330 | return new QTextDecoder(this, flags); |
1331 | } |
1332 | |
1333 | |
1334 | /*! |
1335 | Creates a QTextEncoder which stores enough state to encode chunks |
1336 | of Unicode data as \c{char *} data. |
1337 | |
1338 | The caller is responsible for deleting the returned object. |
1339 | */ |
1340 | QTextEncoder* QTextCodec::makeEncoder() const |
1341 | { |
1342 | return new QTextEncoder(this); |
1343 | } |
1344 | |
1345 | /*! |
1346 | Creates a QTextEncoder with a specified \a flags to encode chunks |
1347 | of Unicode data as \c{char *} data. |
1348 | |
1349 | The caller is responsible for deleting the returned object. |
1350 | |
1351 | \since 4.7 |
1352 | */ |
1353 | QTextEncoder* QTextCodec::makeEncoder(QTextCodec::ConversionFlags flags) const |
1354 | { |
1355 | return new QTextEncoder(this, flags); |
1356 | } |
1357 | |
1358 | /*! |
1359 | \fn QByteArray QTextCodec::fromUnicode(const QChar *input, int number, |
1360 | ConverterState *state) const |
1361 | |
1362 | Converts the first \a number of characters from the \a input array |
1363 | from Unicode to the encoding of this codec, and returns the result |
1364 | in a QByteArray. |
1365 | |
1366 | The \a state of the convertor used is updated. |
1367 | */ |
1368 | |
1369 | /*! |
1370 | Converts \a str from Unicode to the encoding of this codec, and |
1371 | returns the result in a QByteArray. |
1372 | */ |
1373 | QByteArray QTextCodec::fromUnicode(const QString& str) const |
1374 | { |
1375 | return convertFromUnicode(str.constData(), str.length(), 0); |
1376 | } |
1377 | |
1378 | /*! |
1379 | \fn QString QTextCodec::toUnicode(const char *input, int size, |
1380 | ConverterState *state) const |
1381 | |
1382 | Converts the first \a size characters from the \a input from the |
1383 | encoding of this codec to Unicode, and returns the result in a |
1384 | QString. |
1385 | |
1386 | The \a state of the convertor used is updated. |
1387 | */ |
1388 | |
1389 | /*! |
1390 | Converts \a a from the encoding of this codec to Unicode, and |
1391 | returns the result in a QString. |
1392 | */ |
1393 | QString QTextCodec::toUnicode(const QByteArray& a) const |
1394 | { |
1395 | return convertToUnicode(a.constData(), a.length(), 0); |
1396 | } |
1397 | |
1398 | /*! |
1399 | Returns true if the Unicode character \a ch can be fully encoded |
1400 | with this codec; otherwise returns false. |
1401 | */ |
1402 | bool QTextCodec::canEncode(QChar ch) const |
1403 | { |
1404 | ConverterState state; |
1405 | state.flags = ConvertInvalidToNull; |
1406 | convertFromUnicode(&ch, 1, &state); |
1407 | return (state.invalidChars == 0); |
1408 | } |
1409 | |
1410 | /*! |
1411 | \overload |
1412 | |
1413 | \a s contains the string being tested for encode-ability. |
1414 | */ |
1415 | bool QTextCodec::canEncode(const QString& s) const |
1416 | { |
1417 | ConverterState state; |
1418 | state.flags = ConvertInvalidToNull; |
1419 | convertFromUnicode(s.constData(), s.length(), &state); |
1420 | return (state.invalidChars == 0); |
1421 | } |
1422 | |
1423 | #ifdef QT3_SUPPORT |
1424 | /*! |
1425 | Returns a string representing the current language and |
1426 | sublanguage, e.g. "pt" for Portuguese, or "pt_br" for Portuguese/Brazil. |
1427 | |
1428 | \sa QLocale |
1429 | */ |
1430 | const char *QTextCodec::locale() |
1431 | { |
1432 | static char locale[6]; |
1433 | QByteArray l = QLocale::system().name().toLatin1(); |
1434 | int len = qMin(l.length(), 5); |
1435 | memcpy(locale, l.constData(), len); |
1436 | locale[len] = '\0'; |
1437 | |
1438 | return locale; |
1439 | } |
1440 | |
1441 | /*! |
1442 | \overload |
1443 | */ |
1444 | |
1445 | QByteArray QTextCodec::fromUnicode(const QString& uc, int& lenInOut) const |
1446 | { |
1447 | QByteArray result = convertFromUnicode(uc.constData(), lenInOut, 0); |
1448 | lenInOut = result.length(); |
1449 | return result; |
1450 | } |
1451 | |
1452 | /*! |
1453 | \overload |
1454 | |
1455 | \a a contains the source characters; \a len contains the number of |
1456 | characters in \a a to use. |
1457 | */ |
1458 | QString QTextCodec::toUnicode(const QByteArray& a, int len) const |
1459 | { |
1460 | len = qMin(a.size(), len); |
1461 | return convertToUnicode(a.constData(), len, 0); |
1462 | } |
1463 | #endif |
1464 | |
1465 | /*! |
1466 | \overload |
1467 | |
1468 | \a chars contains the source characters. |
1469 | */ |
1470 | QString QTextCodec::toUnicode(const char *chars) const |
1471 | { |
1472 | int len = qstrlen(chars); |
1473 | return convertToUnicode(chars, len, 0); |
1474 | } |
1475 | |
1476 | |
1477 | /*! |
1478 | \class QTextEncoder |
1479 | \brief The QTextEncoder class provides a state-based encoder. |
1480 | \reentrant |
1481 | \ingroup i18n |
1482 | |
1483 | A text encoder converts text from Unicode into an encoded text format |
1484 | using a specific codec. |
1485 | |
1486 | The encoder converts Unicode into another format, remembering any |
1487 | state that is required between calls. |
1488 | |
1489 | \sa QTextCodec::makeEncoder(), QTextDecoder |
1490 | */ |
1491 | |
1492 | /*! |
1493 | \fn QTextEncoder::QTextEncoder(const QTextCodec *codec) |
1494 | |
1495 | Constructs a text encoder for the given \a codec. |
1496 | */ |
1497 | |
1498 | /*! |
1499 | Constructs a text encoder for the given \a codec and conversion \a flags. |
1500 | |
1501 | \since 4.7 |
1502 | */ |
1503 | QTextEncoder::QTextEncoder(const QTextCodec *codec, QTextCodec::ConversionFlags flags) |
1504 | : c(codec), state() |
1505 | { |
1506 | state.flags = flags; |
1507 | } |
1508 | |
1509 | /*! |
1510 | Destroys the encoder. |
1511 | */ |
1512 | QTextEncoder::~QTextEncoder() |
1513 | { |
1514 | } |
1515 | |
1516 | /*! \internal |
1517 | \since 4.5 |
1518 | Determines whether the eecoder encountered a failure while decoding the input. If |
1519 | an error was encountered, the produced result is undefined, and gets converted as according |
1520 | to the conversion flags. |
1521 | */ |
1522 | bool QTextEncoder::hasFailure() const |
1523 | { |
1524 | return state.invalidChars != 0; |
1525 | } |
1526 | |
1527 | /*! |
1528 | Converts the Unicode string \a str into an encoded QByteArray. |
1529 | */ |
1530 | QByteArray QTextEncoder::fromUnicode(const QString& str) |
1531 | { |
1532 | QByteArray result = c->fromUnicode(str.constData(), str.length(), &state); |
1533 | return result; |
1534 | } |
1535 | |
1536 | /*! |
1537 | \overload |
1538 | |
1539 | Converts \a len characters (not bytes) from \a uc, and returns the |
1540 | result in a QByteArray. |
1541 | */ |
1542 | QByteArray QTextEncoder::fromUnicode(const QChar *uc, int len) |
1543 | { |
1544 | QByteArray result = c->fromUnicode(uc, len, &state); |
1545 | return result; |
1546 | } |
1547 | |
1548 | #ifdef QT3_SUPPORT |
1549 | /*! |
1550 | \overload |
1551 | |
1552 | Converts \a lenInOut characters (not bytes) from \a uc, and returns the |
1553 | result in a QByteArray. The number of characters read is returned in |
1554 | the \a lenInOut parameter. |
1555 | */ |
1556 | QByteArray QTextEncoder::fromUnicode(const QString& uc, int& lenInOut) |
1557 | { |
1558 | QByteArray result = c->fromUnicode(uc.constData(), lenInOut, &state); |
1559 | lenInOut = result.length(); |
1560 | return result; |
1561 | } |
1562 | #endif |
1563 | |
1564 | /*! |
1565 | \class QTextDecoder |
1566 | \brief The QTextDecoder class provides a state-based decoder. |
1567 | \reentrant |
1568 | \ingroup i18n |
1569 | |
1570 | A text decoder converts text from an encoded text format into Unicode |
1571 | using a specific codec. |
1572 | |
1573 | The decoder converts text in this format into Unicode, remembering any |
1574 | state that is required between calls. |
1575 | |
1576 | \sa QTextCodec::makeDecoder(), QTextEncoder |
1577 | */ |
1578 | |
1579 | /*! |
1580 | \fn QTextDecoder::QTextDecoder(const QTextCodec *codec) |
1581 | |
1582 | Constructs a text decoder for the given \a codec. |
1583 | */ |
1584 | |
1585 | /*! |
1586 | Constructs a text decoder for the given \a codec and conversion \a flags. |
1587 | |
1588 | \since 4.7 |
1589 | */ |
1590 | |
1591 | QTextDecoder::QTextDecoder(const QTextCodec *codec, QTextCodec::ConversionFlags flags) |
1592 | : c(codec), state() |
1593 | { |
1594 | state.flags = flags; |
1595 | } |
1596 | |
1597 | /*! |
1598 | Destroys the decoder. |
1599 | */ |
1600 | QTextDecoder::~QTextDecoder() |
1601 | { |
1602 | } |
1603 | |
1604 | /*! |
1605 | \fn QString QTextDecoder::toUnicode(const char *chars, int len) |
1606 | |
1607 | Converts the first \a len bytes in \a chars to Unicode, returning |
1608 | the result. |
1609 | |
1610 | If not all characters are used (e.g. if only part of a multi-byte |
1611 | encoding is at the end of the characters), the decoder remembers |
1612 | enough state to continue with the next call to this function. |
1613 | */ |
1614 | QString QTextDecoder::toUnicode(const char *chars, int len) |
1615 | { |
1616 | return c->toUnicode(chars, len, &state); |
1617 | } |
1618 | |
1619 | |
1620 | /*! \overload |
1621 | |
1622 | The converted string is returned in \a target. |
1623 | */ |
1624 | void QTextDecoder::toUnicode(QString *target, const char *chars, int len) |
1625 | { |
1626 | Q_ASSERT(target); |
1627 | switch (c->mibEnum()) { |
1628 | case 106: // utf8 |
1629 | static_cast<const QUtf8Codec*>(c)->convertToUnicode(target, chars, len, &state); |
1630 | break; |
1631 | case 4: { // latin1 |
1632 | target->resize(len); |
1633 | ushort *data = (ushort*)target->data(); |
1634 | for (int i = len; i >=0; --i) |
1635 | data[i] = (uchar) chars[i]; |
1636 | } break; |
1637 | default: |
1638 | *target = c->toUnicode(chars, len, &state); |
1639 | } |
1640 | } |
1641 | |
1642 | |
1643 | /*! |
1644 | \overload |
1645 | |
1646 | Converts the bytes in the byte array specified by \a ba to Unicode |
1647 | and returns the result. |
1648 | */ |
1649 | QString QTextDecoder::toUnicode(const QByteArray &ba) |
1650 | { |
1651 | return c->toUnicode(ba.constData(), ba.length(), &state); |
1652 | } |
1653 | |
1654 | |
1655 | /*! |
1656 | \fn QTextCodec* QTextCodec::codecForTr() |
1657 | |
1658 | Returns the codec used by QObject::tr() on its argument. If this |
1659 | function returns 0 (the default), tr() assumes Latin-1. |
1660 | |
1661 | \sa setCodecForTr() |
1662 | */ |
1663 | |
1664 | /*! |
1665 | \fn void QTextCodec::setCodecForTr(QTextCodec *c) |
1666 | \nonreentrant |
1667 | |
1668 | Sets the codec used by QObject::tr() on its argument to \a c. If |
1669 | \a c is 0 (the default), tr() assumes Latin-1. |
1670 | |
1671 | If the literal quoted text in the program is not in the Latin-1 |
1672 | encoding, this function can be used to set the appropriate |
1673 | encoding. For example, software developed by Korean programmers |
1674 | might use eucKR for all the text in the program, in which case the |
1675 | main() function might look like this: |
1676 | |
1677 | \snippet doc/src/snippets/code/src_corelib_codecs_qtextcodec.cpp 3 |
1678 | |
1679 | Note that this is not the way to select the encoding that the \e |
1680 | user has chosen. For example, to convert an application containing |
1681 | literal English strings to Korean, all that is needed is for the |
1682 | English strings to be passed through tr() and for translation |
1683 | files to be loaded. For details of internationalization, see |
1684 | \l{Internationalization with Qt}. |
1685 | |
1686 | \sa codecForTr(), setCodecForCStrings() |
1687 | */ |
1688 | |
1689 | |
1690 | /*! |
1691 | \fn QTextCodec* QTextCodec::codecForCStrings() |
1692 | |
1693 | Returns the codec used by QString to convert to and from \c{const |
1694 | char *} and QByteArrays. If this function returns 0 (the default), |
1695 | QString assumes Latin-1. |
1696 | |
1697 | \sa setCodecForCStrings() |
1698 | */ |
1699 | |
1700 | /*! |
1701 | \fn void QTextCodec::setCodecForCStrings(QTextCodec *codec) |
1702 | \nonreentrant |
1703 | |
1704 | Sets the codec used by QString to convert to and from \c{const |
1705 | char *} and QByteArrays. If the \a codec is 0 (the default), |
1706 | QString assumes Latin-1. |
1707 | |
1708 | \warning Some codecs do not preserve the characters in the ASCII |
1709 | range (0x00 to 0x7F). For example, the Japanese Shift-JIS |
1710 | encoding maps the backslash character (0x5A) to the Yen |
1711 | character. To avoid undesirable side-effects, we recommend |
1712 | avoiding such codecs with setCodecsForCString(). |
1713 | |
1714 | \sa codecForCStrings(), setCodecForTr() |
1715 | */ |
1716 | |
1717 | /*! |
1718 | \since 4.4 |
1719 | |
1720 | Tries to detect the encoding of the provided snippet of HTML in |
1721 | the given byte array, \a ba, by checking the BOM (Byte Order Mark) |
1722 | and the content-type meta header and returns a QTextCodec instance |
1723 | that is capable of decoding the html to unicode. If the codec |
1724 | cannot be detected from the content provided, \a defaultCodec is |
1725 | returned. |
1726 | |
1727 | \sa codecForUtfText() |
1728 | */ |
1729 | QTextCodec *QTextCodec::codecForHtml(const QByteArray &ba, QTextCodec *defaultCodec) |
1730 | { |
1731 | // determine charset |
1732 | int pos; |
1733 | QTextCodec *c = 0; |
1734 | |
1735 | c = QTextCodec::codecForUtfText(ba, c); |
1736 | if (!c) { |
1737 | QByteArray = ba.left(512).toLower(); |
1738 | if ((pos = header.indexOf("http-equiv=" )) != -1) { |
1739 | if ((pos = header.lastIndexOf("meta " , pos)) != -1) { |
1740 | pos = header.indexOf("charset=" , pos) + int(strlen("charset=" )); |
1741 | if (pos != -1) { |
1742 | int pos2 = header.indexOf('\"', pos+1); |
1743 | QByteArray cs = header.mid(pos, pos2-pos); |
1744 | // qDebug("found charset: %s", cs.data()); |
1745 | c = QTextCodec::codecForName(cs); |
1746 | } |
1747 | } |
1748 | } |
1749 | } |
1750 | if (!c) |
1751 | c = defaultCodec; |
1752 | |
1753 | return c; |
1754 | } |
1755 | |
1756 | /*! |
1757 | \overload |
1758 | |
1759 | Tries to detect the encoding of the provided snippet of HTML in |
1760 | the given byte array, \a ba, by checking the BOM (Byte Order Mark) |
1761 | and the content-type meta header and returns a QTextCodec instance |
1762 | that is capable of decoding the html to unicode. If the codec cannot |
1763 | be detected, this overload returns a Latin-1 QTextCodec. |
1764 | */ |
1765 | QTextCodec *QTextCodec::codecForHtml(const QByteArray &ba) |
1766 | { |
1767 | return codecForHtml(ba, QTextCodec::codecForMib(/*Latin 1*/ 4)); |
1768 | } |
1769 | |
1770 | /*! |
1771 | \since 4.6 |
1772 | |
1773 | Tries to detect the encoding of the provided snippet \a ba by |
1774 | using the BOM (Byte Order Mark) and returns a QTextCodec instance |
1775 | that is capable of decoding the text to unicode. If the codec |
1776 | cannot be detected from the content provided, \a defaultCodec is |
1777 | returned. |
1778 | |
1779 | \sa codecForHtml() |
1780 | */ |
1781 | QTextCodec *QTextCodec::codecForUtfText(const QByteArray &ba, QTextCodec *defaultCodec) |
1782 | { |
1783 | const int arraySize = ba.size(); |
1784 | |
1785 | if (arraySize > 3) { |
1786 | if ((uchar)ba[0] == 0x00 |
1787 | && (uchar)ba[1] == 0x00 |
1788 | && (uchar)ba[2] == 0xFE |
1789 | && (uchar)ba[3] == 0xFF) |
1790 | return QTextCodec::codecForMib(1018); // utf-32 be |
1791 | else if ((uchar)ba[0] == 0xFF |
1792 | && (uchar)ba[1] == 0xFE |
1793 | && (uchar)ba[2] == 0x00 |
1794 | && (uchar)ba[3] == 0x00) |
1795 | return QTextCodec::codecForMib(1019); // utf-32 le |
1796 | } |
1797 | |
1798 | if (arraySize < 2) |
1799 | return defaultCodec; |
1800 | if ((uchar)ba[0] == 0xfe && (uchar)ba[1] == 0xff) |
1801 | return QTextCodec::codecForMib(1013); // utf16 be |
1802 | else if ((uchar)ba[0] == 0xff && (uchar)ba[1] == 0xfe) |
1803 | return QTextCodec::codecForMib(1014); // utf16 le |
1804 | |
1805 | if (arraySize < 3) |
1806 | return defaultCodec; |
1807 | if ((uchar)ba[0] == 0xef |
1808 | && (uchar)ba[1] == 0xbb |
1809 | && (uchar)ba[2] == 0xbf) |
1810 | return QTextCodec::codecForMib(106); // utf-8 |
1811 | |
1812 | return defaultCodec; |
1813 | } |
1814 | |
1815 | /*! |
1816 | \overload |
1817 | |
1818 | Tries to detect the encoding of the provided snippet \a ba by |
1819 | using the BOM (Byte Order Mark) and returns a QTextCodec instance |
1820 | that is capable of decoding the text to unicode. If the codec |
1821 | cannot be detected, this overload returns a Latin-1 QTextCodec. |
1822 | |
1823 | \sa codecForHtml() |
1824 | */ |
1825 | QTextCodec *QTextCodec::codecForUtfText(const QByteArray &ba) |
1826 | { |
1827 | return codecForUtfText(ba, QTextCodec::codecForMib(/*Latin 1*/ 4)); |
1828 | } |
1829 | |
1830 | |
1831 | /*! \internal |
1832 | \since 4.3 |
1833 | Determines whether the decoder encountered a failure while decoding the input. If |
1834 | an error was encountered, the produced result is undefined, and gets converted as according |
1835 | to the conversion flags. |
1836 | */ |
1837 | bool QTextDecoder::hasFailure() const |
1838 | { |
1839 | return state.invalidChars != 0; |
1840 | } |
1841 | |
1842 | /*! |
1843 | \fn QTextCodec *QTextCodec::codecForContent(const char *str, int size) |
1844 | |
1845 | This functionality is no longer provided by Qt. This |
1846 | compatibility function always returns a null pointer. |
1847 | */ |
1848 | |
1849 | /*! |
1850 | \fn QTextCodec *QTextCodec::codecForName(const char *hint, int accuracy) |
1851 | |
1852 | Use the codecForName(const QByteArray &) overload instead. |
1853 | */ |
1854 | |
1855 | /*! |
1856 | \fn QTextCodec *QTextCodec::codecForIndex(int i) |
1857 | |
1858 | Use availableCodecs() or availableMibs() instead and iterate |
1859 | through the resulting list. |
1860 | */ |
1861 | |
1862 | |
1863 | /*! |
1864 | \fn QByteArray QTextCodec::mimeName() const |
1865 | |
1866 | Use name() instead. |
1867 | */ |
1868 | |
1869 | QT_END_NAMESPACE |
1870 | |
1871 | #endif // QT_NO_TEXTCODEC |
1872 | |