qtextcodec.cpp [qt4/src/corelib/codecs/qtextcodec.cpp]

1	/****************************************************************************
2	**
3	** Copyright (C) 2014 Digia Plc and/or its subsidiary(-ies).
4	** Contact: http://www.qt-project.org/legal
5	**
6	** This file is part of the QtCore module of the Qt Toolkit.
7	**
8	** $QT_BEGIN_LICENSE:LGPL$
9	** Commercial License Usage
10	** Licensees holding valid commercial Qt licenses may use this file in
11	** accordance with the commercial license agreement provided with the
12	** Software or, alternatively, in accordance with the terms contained in
13	** a written agreement between you and Digia. For licensing terms and
14	** conditions see http://qt.digia.com/licensing. For further information
15	** use the contact form at http://qt.digia.com/contact-us.
16	**
17	** GNU Lesser General Public License Usage
18	** Alternatively, this file may be used under the terms of the GNU Lesser
19	** General Public License version 2.1 as published by the Free Software
20	** Foundation and appearing in the file LICENSE.LGPL included in the
21	** packaging of this file. Please review the following information to
22	** ensure the GNU Lesser General Public License version 2.1 requirements
23	** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
24	**
25	** In addition, as a special exception, Digia gives you certain additional
26	** rights. These rights are described in the Digia Qt LGPL Exception
27	** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
28	**
29	** GNU General Public License Usage
30	** Alternatively, this file may be used under the terms of the GNU
31	** General Public License version 3.0 as published by the Free Software
32	** Foundation and appearing in the file LICENSE.GPL included in the
33	** packaging of this file. Please review the following information to
34	** ensure the GNU General Public License version 3.0 requirements will be
35	** met: http://www.gnu.org/copyleft/gpl.html.
36	**
37	**
38	** $QT_END_LICENSE$
39	**
40	****************************************************************************/
41
42	#include "qplatformdefs.h"
43	#include "qtextcodec.h"
44	#include "qtextcodec_p.h"
45
46	#ifndef QT_NO_TEXTCODEC
47
48	#include "qlist.h"
49	#include "qfile.h"
50	#include "qvarlengtharray.h"
51	#ifndef QT_NO_LIBRARY
52	# include "qcoreapplication.h"
53	# include "qtextcodecplugin.h"
54	# include "private/qfactoryloader_p.h"
55	#endif
56	#include "qstringlist.h"
57
58	#ifdef Q_OS_UNIX
59	# include "qiconvcodec_p.h"
60	#endif
61
62	#include "qutfcodec_p.h"
63	#include "qsimplecodec_p.h"
64	#include "qlatincodec_p.h"
65	#ifndef QT_NO_CODECS
66	# include "qtsciicodec_p.h"
67	# include "qisciicodec_p.h"
68	#if !defined(Q_OS_SYMBIAN) && !defined(Q_OS_INTEGRITY)
69	# if defined(QT_NO_ICONV) && !defined(QT_BOOTSTRAPPED) && !defined(QT_CODEC_PLUGINS)
70	// no iconv(3) support, must build all codecs into the library
71	# include "../../plugins/codecs/cn/qgb18030codec.h"
72	# include "../../plugins/codecs/jp/qeucjpcodec.h"
73	# include "../../plugins/codecs/jp/qjiscodec.h"
74	# include "../../plugins/codecs/jp/qsjiscodec.h"
75	# include "../../plugins/codecs/kr/qeuckrcodec.h"
76	# include "../../plugins/codecs/tw/qbig5codec.h"
77	# endif // QT_NO_ICONV && !QT_BOOTSTRAPPED && !QT_CODEC_PLUGINS
78	# if defined(Q_WS_X11) && !defined(QT_BOOTSTRAPPED)
79	# include "qfontlaocodec_p.h"
80	# include "../../plugins/codecs/jp/qfontjpcodec.h"
81	# endif
82	#endif // QT_NO_SYMBIAN
83	#endif // QT_NO_CODECS
84	#include "qlocale.h"
85	#include "qmutex.h"
86	#include "qhash.h"
87
88	#include <stdlib.h>
89	#include <ctype.h>
90	#include <locale.h>
91	#if defined (_XOPEN_UNIX) && !defined(Q_OS_QNX) && !defined(Q_OS_OSF)
92	#include <langinfo.h>
93	#endif
94
95	#if defined(Q_OS_WINCE)
96	# define QT_NO_SETLOCALE
97	#endif
98
99	#ifdef Q_OS_SYMBIAN
100	#include "qtextcodec_symbian.cpp"
101	#endif
102
103
104	// enabling this is not exception safe!
105	// #define Q_DEBUG_TEXTCODEC
106
107	QT_BEGIN_NAMESPACE
108
109	#if !defined(QT_NO_LIBRARY) && !defined(QT_NO_TEXTCODECPLUGIN)
110	Q_GLOBAL_STATIC_WITH_ARGS(QFactoryLoader, loader,
111	(QTextCodecFactoryInterface_iid, QLatin1String ("/codecs")))
112	#endif
113
114	//Cache for QTextCodec::codecForName and codecForMib.
115	typedef QHash<QByteArray, QTextCodec *> QTextCodecCache;
116	Q_GLOBAL_STATIC(QTextCodecCache, qTextCodecCache)
117
118
119	static char qtolower(register char c)
120	{ if (c >= 'A' && c <= 'Z') return c + `0x20`; return c; }
121	static bool qisalnum(register char c)
122	{ return (c >= '0' && c <= '9') \|\| ((c \| `0x20`) >= 'a' && (c \| `0x20`) <= 'z'); }
123
124	static bool nameMatch(const QByteArray &name, const QByteArray &test)
125	{
126	// if they're the same, return a perfect score
127	if (qstricmp(name, test) == `0`)
128	return true;
129
130	const char *n = name.constData();
131	const char *h = test.constData();
132
133	// if the letters and numbers are the same, we have a match
134	while (*n != '\0') {
135	if (qisalnum(*n)) {
136	for (;;) {
137	if (*h == '\0')
138	return false;
139	if (qisalnum(*h))
140	break;
141	++h;
142	}
143	if (qtolower(n) != qtolower(h))
144	return false;
145	++h;
146	}
147	++n;
148	}
149	while (h && !qisalnum(h))
150	++h;
151	return (*h == '\0');
152	}
153
154
155	static QTextCodec createForName(const* QByteArray &name)
156	{
157	#if !defined(QT_NO_LIBRARY) && !defined(QT_NO_TEXTCODECPLUGIN)
158	QFactoryLoader *l = loader();
159	QStringList keys = l->keys();
160	for (int i = `0`; i < keys.size(); ++i) {
161	if (nameMatch(name, keys.at(i).toLatin1())) {
162	QString realName = keys.at(i);
163	if (QTextCodecFactoryInterface *factory
164	= qobject_cast<QTextCodecFactoryInterface*>(l->instance(realName))) {
165	return factory->create(realName);
166	}
167	}
168	}
169	#else
170	Q_UNUSED(name);
171	#endif
172	return `0`;
173	}
174
175	static QTextCodec createForMib(int* mib)
176	{
177	#ifndef QT_NO_TEXTCODECPLUGIN
178	QString name = QLatin1String ("MIB: ") + QString::number(mib);
179	if (QTextCodecFactoryInterface *factory
180	= qobject_cast<QTextCodecFactoryInterface*>(loader()->instance(name)))
181	return factory->create(name);
182	#else
183	Q_UNUSED(mib);
184	#endif
185	return `0`;
186	}
187
188	static QList<QTextCodec> all = `0`;
189	#ifdef Q_DEBUG_TEXTCODEC
190	static bool destroying_is_ok = false;
191	#endif
192
193	static QTextCodec *localeMapper = `0`;
194	QTextCodec *QTextCodec::cftr = `0`;
195
196
197	class QTextCodecCleanup
198	{
199	public:
200	~QTextCodecCleanup();
201	};
202
203	/*
204	Deletes all the created codecs. This destructor is called just
205	before exiting to delete any QTextCodec objects that may be lying
206	around.
207	*/
208	QTextCodecCleanup::~QTextCodecCleanup()
209	{
210	if (!all)
211	return;
212
213	#ifdef Q_DEBUG_TEXTCODEC
214	destroying_is_ok = true;
215	#endif
216
217	QList<QTextCodec > myAll = all;
218	all = `0`; // Otherwise the d'tor destroys the iterator
219	for (QList<QTextCodec *>::const_iterator it = myAll->constBegin()
220	; it != myAll->constEnd(); ++it) {
221	delete *it;
222	}
223	delete myAll;
224	localeMapper = `0`;
225
226	#ifdef Q_DEBUG_TEXTCODEC
227	destroying_is_ok = false;
228	#endif
229	}
230
231	Q_GLOBAL_STATIC(QTextCodecCleanup, createQTextCodecCleanup)
232
233	bool QTextCodec::validCodecs()
234	{
235	#ifdef Q_OS_SYMBIAN
236	// If we don't have a trap handler, we're outside of the main() function,
237	// ie. in global constructors or destructors. Don't use codecs in this
238	// case as it would lead to crashes because we don't have a cleanup stack on Symbian
239	return (User::TrapHandler() != NULL);
240	#else
241	return true;
242	#endif
243	}
244
245
246	#if defined(Q_OS_WIN32) \|\| defined(Q_OS_WINCE)
247	class QWindowsLocalCodec: public QTextCodec
248	{
249	public:
250	QWindowsLocalCodec();
251	~QWindowsLocalCodec();
252
253	QString convertToUnicode(const char , int, ConverterState ) const;
254	QByteArray convertFromUnicode(const QChar , int, ConverterState ) const;
255	QString convertToUnicodeCharByChar(const char chars, int* length, ConverterState state) const*;
256
257	QByteArray name() const;
258	int mibEnum() const;
259
260	};
261
262	QWindowsLocalCodec::QWindowsLocalCodec()
263	{
264	}
265
266	QWindowsLocalCodec::~QWindowsLocalCodec()
267	{
268	}
269
270	QString QWindowsLocalCodec::convertToUnicode(const char chars, int* length, ConverterState state) const*
271	{
272	const char *mb = chars;
273	int mblen = length;
274
275	if (!mb \|\| !mblen)
276	return QString();
277
278	QVarLengthArray<wchar_t, `4096`> wc(`4096`);
279	int len;
280	QString sp;
281	bool prepend = false;
282	char state_data = `0`;
283	int remainingChars = `0`;
284
285	//save the current state information
286	if (state) {
287	state_data = (char)state->state_data[`0`];
288	remainingChars = state->remainingChars;
289	}
290
291	//convert the pending charcter (if available)
292	if (state && remainingChars) {
293	char prev[`3`] = {`0`};
294	prev[`0`] = state_data;
295	prev[`1`] = mb[`0`];
296	remainingChars = `0`;
297	len = MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED,
298	prev, `2`, wc.data(), wc.size());
299	if (len) {
300	prepend = true;
301	sp.append(QChar(wc[`0`]));
302	mb++;
303	mblen--;
304	wc[`0`] = `0`;
305	}
306	}
307
308	while (!(len=MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED\|MB_ERR_INVALID_CHARS,
309	mb, mblen, wc.data(), wc.size()))) {
310	int r = GetLastError();
311	if (r == ERROR_INSUFFICIENT_BUFFER) {
312	const int wclen = MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED,
313	mb, mblen, `0`, `0`);
314	wc.resize(wclen);
315	} else if (r == ERROR_NO_UNICODE_TRANSLATION) {
316	//find the last non NULL character
317	while (mblen > `1` && !(mb[mblen-`1`]))
318	mblen--;
319	//check whether, we hit an invalid character in the middle
320	if ((mblen <= `1`) \|\| (remainingChars && state_data))
321	return convertToUnicodeCharByChar(chars, length, state);
322	//Remove the last character and try again...
323	state_data = mb[mblen-`1`];
324	remainingChars = `1`;
325	mblen--;
326	} else {
327	// Fail.
328	qWarning("MultiByteToWideChar: Cannot convert multibyte text");
329	break;
330	}
331	}
332
333	if (len <= `0`)
334	return QString();
335
336	if (wc[len-`1`] == `0`) // len - 1: we don't want terminator
337	--len;
338
339	//save the new state information
340	if (state) {
341	state->state_data[`0`] = (char)state_data;
342	state->remainingChars = remainingChars;
343	}
344	QString s((QChar*)wc.data(), len);
345	if (prepend) {
346	return sp+s;
347	}
348	return s;
349	}
350
351	QString QWindowsLocalCodec::convertToUnicodeCharByChar(const char chars, int* length, ConverterState state) const*
352	{
353	if (!chars \|\| !length)
354	return QString();
355
356	int copyLocation = `0`;
357	int extra = `2`;
358	if (state && state->remainingChars) {
359	copyLocation = state->remainingChars;
360	extra += copyLocation;
361	}
362	int newLength = length + extra;
363	char mbcs = new* char[newLength];
364	//ensure that we have a NULL terminated string
365	mbcs[newLength-`1`] = `0`;
366	mbcs[newLength-`2`] = `0`;
367	memcpy(&(mbcs[copyLocation]), chars, length);
368	if (copyLocation) {
369	//copy the last character from the state
370	mbcs[`0`] = (char)state->state_data[`0`];
371	state->remainingChars = `0`;
372	}
373	const char *mb = mbcs;
374	#ifndef Q_OS_WINCE
375	const char *next = `0`;
376	QString s;
377	while((next = CharNextExA(CP_ACP, mb, `0`)) != mb) {
378	wchar_t wc[`2`] ={`0`};
379	int charlength = next - mb;
380	int len = MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED\|MB_ERR_INVALID_CHARS, mb, charlength, wc, `2`);
381	if (len>`0`) {
382	s.append(QChar(wc[`0`]));
383	} else {
384	int r = GetLastError();
385	//check if the character being dropped is the last character
386	if (r == ERROR_NO_UNICODE_TRANSLATION && mb == (mbcs+newLength -`3`) && state) {
387	state->remainingChars = `1`;
388	state->state_data[`0`] = (char)*mb;
389	}
390	}
391	mb = next;
392	}
393	#else
394	QString s;
395	int size = mbstowcs(NULL, mb, length);
396	if (size < `0`) {
397	Q_ASSERT("Error in CE TextCodec");
398	return QString();
399	}
400	wchar_t* ws = new wchar_t[size + `2`];
401	ws[size +`1`] = `0`;
402	ws[size] = `0`;
403	size = mbstowcs(ws, mb, length);
404	for (int i=`0`; i< size; i++)
405	s.append(QChar(ws[i]));
406	delete [] ws;
407	#endif
408	delete [] mbcs;
409	return s;
410	}
411
412	QByteArray QWindowsLocalCodec::convertFromUnicode(const QChar ch, int* uclen, ConverterState ) const*
413	{
414	if (!ch)
415	return QByteArray();
416	if (uclen == `0`)
417	return QByteArray("");
418	BOOL used_def;
419	QByteArray mb(`4096`, `0`);
420	int len;
421	while (!(len=WideCharToMultiByte(CP_ACP, `0`, (const wchar_t*)ch, uclen,
422	mb.data(), mb.size()-`1`, `0`, &used_def)))
423	{
424	int r = GetLastError();
425	if (r == ERROR_INSUFFICIENT_BUFFER) {
426	mb.resize(`1`+WideCharToMultiByte(CP_ACP, `0`,
427	(const wchar_t*)ch, uclen,
428	`0`, `0`, `0`, &used_def));
429	// and try again...
430	} else {
431	#ifndef QT_NO_DEBUG
432	// Fail.
433	qWarning("WideCharToMultiByte: Cannot convert multibyte text (error %d): %s (UTF-8)",
434	r, QString(ch, uclen).toLocal8Bit().data());
435	#endif
436	break;
437	}
438	}
439	mb.resize(len);
440	return mb;
441	}
442
443
444	QByteArray QWindowsLocalCodec::name() const
445	{
446	return "System";
447	}
448
449	int QWindowsLocalCodec::mibEnum() const
450	{
451	return `0`;
452	}
453
454	#else
455
456	/ locale names mostly copied from XFree86 /
457	static const char * const iso8859_2locales[] = {
458	"croatian", "cs", "cs_CS", "cs_CZ","cz", "cz_CZ", "czech", "hr",
459	"hr_HR", "hu", "hu_HU", "hungarian", "pl", "pl_PL", "polish", "ro",
460	"ro_RO", "rumanian", "serbocroatian", "sh", "sh_SP", "sh_YU", "sk",
461	"sk_SK", "sl", "sl_CS", "sl_SI", "slovak", "slovene", "sr_SP", `0` };
462
463	static const char * const iso8859_3locales[] = {
464	"eo", `0` };
465
466	static const char * const iso8859_4locales[] = {
467	"ee", "ee_EE", `0` };
468
469	static const char * const iso8859_5locales[] = {
470	"mk", "mk_MK", "sp", "sp_YU", `0` };
471
472	static const char * const cp_1251locales[] = {
473	"be", "be_BY", "bg", "bg_BG", "bulgarian", `0` };
474
475	static const char * const pt_154locales[] = {
476	"ba_RU", "ky", "ky_KG", "kk", "kk_KZ", `0` };
477
478	static const char * const iso8859_6locales[] = {
479	"ar_AA", "ar_SA", "arabic", `0` };
480
481	static const char * const iso8859_7locales[] = {
482	"el", "el_GR", "greek", `0` };
483
484	static const char * const iso8859_8locales[] = {
485	"hebrew", "he", "he_IL", "iw", "iw_IL", `0` };
486
487	static const char * const iso8859_9locales[] = {
488	"tr", "tr_TR", "turkish", `0` };
489
490	static const char * const iso8859_13locales[] = {
491	"lt", "lt_LT", "lv", "lv_LV", `0` };
492
493	static const char * const iso8859_15locales[] = {
494	"et", "et_EE",
495	// Euro countries
496	"br_FR", "ca_ES", "de", "de_AT", "de_BE", "de_DE", "de_LU", "en_IE",
497	"es", "es_ES", "eu_ES", "fi", "fi_FI", "finnish", "fr", "fr_FR",
498	"fr_BE", "fr_LU", "french", "ga_IE", "gl_ES", "it", "it_IT", "oc_FR",
499	"nl", "nl_BE", "nl_NL", "pt", "pt_PT", "sv_FI", "wa_BE",
500	`0` };
501
502	static const char * const koi8_ulocales[] = {
503	"uk", "uk_UA", "ru_UA", "ukrainian", `0` };
504
505	static const char * const tis_620locales[] = {
506	"th", "th_TH", "thai", `0` };
507
508	// static const char const tcvnlocales[] = {*
509	// "vi", "vi_VN", 0 };
510
511	static bool try_locale_list(const char * const locale[], const QByteArray &lang)
512	{
513	int i;
514	for(i=`0`; locale[i] && lang != locale[i]; i++)
515	;
516	return locale[i] != `0`;
517	}
518
519	// For the probably_koi8_locales we have to look. the standard says
520	// these are 8859-5, but almost all Russian users use KOI8-R and
521	// incorrectly set $LANG to ru_RU. We'll check tolower() to see what
522	// it thinks ru_RU means.
523
524	// If you read the history, it seems that many Russians blame ISO and
525	// Perestroika for the confusion.
526	//
527	// The real bug is that some programs break if the user specifies
528	// ru_RU.KOI8-R.
529
530	static const char * const probably_koi8_rlocales[] = {
531	"ru", "ru_SU", "ru_RU", "russian", `0` };
532
533	static QTextCodec * ru_RU_hack(const char * i) {
534	QTextCodec * ru_RU_codec = `0`;
535
536	#if !defined(QT_NO_SETLOCALE)
537	QByteArray origlocale(setlocale(LC_CTYPE, i));
538	#else
539	QByteArray origlocale(i);
540	#endif
541	// unicode koi8r latin5 name
542	// 0x044E 0xC0 0xEE CYRILLIC SMALL LETTER YU
543	// 0x042E 0xE0 0xCE CYRILLIC CAPITAL LETTER YU
544	int latin5 = tolower(`0xCE`);
545	int koi8r = tolower(`0xE0`);
546	if (koi8r == `0xC0` && latin5 != `0xEE`) {
547	ru_RU_codec = QTextCodec::codecForName("KOI8-R");
548	} else if (koi8r != `0xC0` && latin5 == `0xEE`) {
549	ru_RU_codec = QTextCodec::codecForName("ISO 8859-5");
550	} else {
551	// something else again... let's assume... throws dice
552	ru_RU_codec = QTextCodec::codecForName("KOI8-R");
553	qWarning("QTextCodec: Using KOI8-R, probe failed (%02x %02x %s)",
554	koi8r, latin5, i);
555	}
556	#if !defined(QT_NO_SETLOCALE)
557	setlocale(LC_CTYPE, origlocale);
558	#endif
559
560	return ru_RU_codec;
561	}
562
563	#endif
564
565	#if !defined(Q_OS_WIN32) && !defined(Q_OS_WINCE)
566	static QTextCodec checkForCodec(const* QByteArray &name) {
567	QTextCodec *c = QTextCodec::codecForName(name);
568	if (!c) {
569	const int index = name.indexOf('@');
570	if (index != -`1`) {
571	c = QTextCodec::codecForName(name.left(index));
572	}
573	}
574	return c;
575	}
576	#endif
577
578	/ the next two functions are implicitely thread safe,*
579	as they are only called by setup() which uses a mutex.
580	*/
581	static void setupLocaleMapper()
582	{
583	#ifdef Q_OS_SYMBIAN
584	localeMapper = QSymbianTextCodec::localeMapper;
585	if (localeMapper)
586	return;
587	#endif
588
589	#if defined(Q_OS_WIN32) \|\| defined(Q_OS_WINCE)
590	localeMapper = QTextCodec::codecForName("System");
591	#else
592
593	#ifndef QT_NO_ICONV
594	localeMapper = QTextCodec::codecForName("System");
595	#endif
596
597	#if defined (_XOPEN_UNIX) && !defined(Q_OS_QNX) && !defined(Q_OS_OSF)
598	if (!localeMapper) {
599	char *charset = nl_langinfo (CODESET);
600	if (charset)
601	localeMapper = QTextCodec::codecForName(charset);
602	}
603	#endif
604
605	if (!localeMapper) {
606	// Very poorly defined and followed standards causes lots of
607	// code to try to get all the cases... This logic is
608	// duplicated in QIconvCodec, so if you change it here, change
609	// it there too.
610
611	// Try to determine locale codeset from locale name assigned to
612	// LC_CTYPE category.
613
614	// First part is getting that locale name. First try setlocale() which
615	// definitely knows it, but since we cannot fully trust it, get ready
616	// to fall back to environment variables.
617	#if !defined(QT_NO_SETLOCALE)
618	const QByteArray ctype = setlocale(LC_CTYPE, `0`);
619	#else
620	const QByteArray ctype;
621	#endif
622
623	// Get the first nonempty value from $LC_ALL, $LC_CTYPE, and $LANG
624	// environment variables.
625	QByteArray lang = qgetenv("LC_ALL");
626	if (lang.isEmpty() \|\| lang == "C") {
627	lang = qgetenv("LC_CTYPE");
628	}
629	if (lang.isEmpty() \|\| lang == "C") {
630	lang = qgetenv("LANG");
631	}
632
633	// Now try these in order:
634	// 1. CODESET from ctype if it contains a .CODESET part (e.g. en_US.ISO8859-15)
635	// 2. CODESET from lang if it contains a .CODESET part
636	// 3. ctype (maybe the locale is named "ISO-8859-1" or something)
637	// 4. locale (ditto)
638	// 5. check for "@euro"
639	// 6. guess locale from ctype unless ctype is "C"
640	// 7. guess locale from lang
641
642	// 1. CODESET from ctype if it contains a .CODESET part (e.g. en_US.ISO8859-15)
643	int indexOfDot = ctype.indexOf('.');
644	if (indexOfDot != -`1`)
645	localeMapper = checkForCodec( ctype.mid(indexOfDot + `1`) );
646
647	// 2. CODESET from lang if it contains a .CODESET part
648	if (!localeMapper) {
649	indexOfDot = lang.indexOf('.');
650	if (indexOfDot != -`1`)
651	localeMapper = checkForCodec( lang.mid(indexOfDot + `1`) );
652	}
653
654	// 3. ctype (maybe the locale is named "ISO-8859-1" or something)
655	if (!localeMapper && !ctype.isEmpty() && ctype != "C")
656	localeMapper = checkForCodec(ctype);
657
658	// 4. locale (ditto)
659	if (!localeMapper && !lang.isEmpty())
660	localeMapper = checkForCodec(lang);
661
662	// 5. "@euro"
663	if ((!localeMapper && ctype.contains("@euro")) \|\| lang.contains("@euro"))
664	localeMapper = checkForCodec("ISO 8859-15");
665
666	// 6. guess locale from ctype unless ctype is "C"
667	// 7. guess locale from lang
668	const QByteArray &try_by_name = (!ctype.isEmpty() && ctype != "C") ? lang : ctype;
669
670	// Now do the guessing.
671	if (!lang.isEmpty() && !localeMapper && !try_by_name.isEmpty()) {
672	if (try_locale_list(iso8859_15locales, lang))
673	localeMapper = QTextCodec::codecForName("ISO 8859-15");
674	else if (try_locale_list(iso8859_2locales, lang))
675	localeMapper = QTextCodec::codecForName("ISO 8859-2");
676	else if (try_locale_list(iso8859_3locales, lang))
677	localeMapper = QTextCodec::codecForName("ISO 8859-3");
678	else if (try_locale_list(iso8859_4locales, lang))
679	localeMapper = QTextCodec::codecForName("ISO 8859-4");
680	else if (try_locale_list(iso8859_5locales, lang))
681	localeMapper = QTextCodec::codecForName("ISO 8859-5");
682	else if (try_locale_list(iso8859_6locales, lang))
683	localeMapper = QTextCodec::codecForName("ISO 8859-6");
684	else if (try_locale_list(iso8859_7locales, lang))
685	localeMapper = QTextCodec::codecForName("ISO 8859-7");
686	else if (try_locale_list(iso8859_8locales, lang))
687	localeMapper = QTextCodec::codecForName("ISO 8859-8-I");
688	else if (try_locale_list(iso8859_9locales, lang))
689	localeMapper = QTextCodec::codecForName("ISO 8859-9");
690	else if (try_locale_list(iso8859_13locales, lang))
691	localeMapper = QTextCodec::codecForName("ISO 8859-13");
692	else if (try_locale_list(tis_620locales, lang))
693	localeMapper = QTextCodec::codecForName("ISO 8859-11");
694	else if (try_locale_list(koi8_ulocales, lang))
695	localeMapper = QTextCodec::codecForName("KOI8-U");
696	else if (try_locale_list(cp_1251locales, lang))
697	localeMapper = QTextCodec::codecForName("CP 1251");
698	else if (try_locale_list(pt_154locales, lang))
699	localeMapper = QTextCodec::codecForName("PT 154");
700	else if (try_locale_list(probably_koi8_rlocales, lang))
701	localeMapper = ru_RU_hack(lang);
702	}
703
704	}
705
706	// If everything failed, we default to 8859-1
707	// We could perhaps default to 8859-15.
708	if (!localeMapper)
709	localeMapper = QTextCodec::codecForName("ISO 8859-1");
710	#endif
711	}
712
713	#ifndef QT_NO_THREAD
714	Q_GLOBAL_STATIC_WITH_ARGS(QMutex, textCodecsMutex, (QMutex::Recursive));
715	#endif
716
717	// textCodecsMutex need to be locked to enter this function
718	static void setup()
719	{
720	if (all)
721	return;
722
723	#ifdef Q_OS_SYMBIAN
724	// If we don't have a trap handler, we're outside of the main() function,
725	// ie. in global constructors or destructors. Don't create codecs in this
726	// case as it would lead to crashes because of a missing cleanup stack on Symbian
727	if (User::TrapHandler() == NULL)
728	return;
729	#endif
730
731	#ifdef Q_DEBUG_TEXTCODEC
732	if (destroying_is_ok)
733	qWarning("QTextCodec: Creating new codec during codec cleanup");
734	#endif
735	all = new QList<QTextCodec*>;
736	// create the cleanup object to cleanup all codecs on exit
737	(void) createQTextCodecCleanup();
738
739	#ifndef QT_NO_CODECS
740	(void)new QTsciiCodec;
741	for (int i = `0`; i < `9`; ++i)
742	(void)new QIsciiCodec (i);
743
744	for (int i = `0`; i < QSimpleTextCodec::numSimpleCodecs; ++i)
745	(void)new QSimpleTextCodec (i);
746
747	#ifdef Q_OS_SYMBIAN
748	localeMapper = QSymbianTextCodec::init();
749	#endif
750
751	# if defined(Q_WS_X11) && !defined(QT_BOOTSTRAPPED)
752	// no font codecs when bootstrapping
753	(void)new QFontLaoCodec;
754	# if defined(QT_NO_ICONV)
755	// no iconv(3) support, must build all codecs into the library
756	(void)new QFontGb2312Codec;
757	(void)new QFontGbkCodec;
758	(void)new QFontGb18030_0Codec;
759	(void)new QFontJis0208Codec;
760	(void)new QFontJis0201Codec;
761	(void)new QFontKsc5601Codec;
762	(void)new QFontBig5hkscsCodec;
763	(void)new QFontBig5Codec;
764	# endif // QT_NO_ICONV && !QT_BOOTSTRAPPED
765	# endif // Q_WS_X11
766
767
768	#if !defined(Q_OS_SYMBIAN) && !defined(Q_OS_INTEGRITY)
769	# if defined(QT_NO_ICONV) && !defined(QT_BOOTSTRAPPED) && !defined(QT_CODEC_PLUGINS)
770	// no asian codecs when bootstrapping, sorry
771	(void)new QGb18030Codec;
772	(void)new QGbkCodec;
773	(void)new QGb2312Codec;
774	(void)new QEucJpCodec;
775	(void)new QJisCodec;
776	(void)new QSjisCodec;
777	(void)new QEucKrCodec;
778	(void)new QCP949Codec;
779	(void)new QBig5Codec;
780	(void)new QBig5hkscsCodec;
781	# endif // QT_NO_ICONV && !QT_BOOTSTRAPPED && !QT_CODEC_PLUGINS
782	#endif //Q_OS_SYMBIAN
783	#endif // QT_NO_CODECS
784
785	#if defined(Q_OS_WIN32) \|\| defined(Q_OS_WINCE)
786	(void) new QWindowsLocalCodec;
787	#endif // Q_OS_WIN32
788
789	(void)new QUtf16Codec;
790	(void)new QUtf16BECodec;
791	(void)new QUtf16LECodec;
792	(void)new QUtf32Codec;
793	(void)new QUtf32BECodec;
794	(void)new QUtf32LECodec;
795	#ifndef Q_OS_SYMBIAN
796	(void)new QLatin15Codec;
797	#endif
798	(void)new QLatin1Codec;
799	(void)new QUtf8Codec;
800
801	#if !defined(Q_OS_SYMBIAN) && !defined(Q_OS_INTEGRITY)
802	#if defined(Q_OS_UNIX) && !defined(QT_NO_ICONV) && !defined(QT_BOOTSTRAPPED)
803	// QIconvCodec depends on the UTF-16 codec, so it needs to be created last
804	(void) new QIconvCodec ();
805	#endif
806	#endif
807
808	if (!localeMapper)
809	setupLocaleMapper();
810	}
811
812	/!*
813	\enum QTextCodec::ConversionFlag
814
815	\value DefaultConversion No flag is set.
816	\value ConvertInvalidToNull If this flag is set, each invalid input
817	character is output as a null character.
818	\value IgnoreHeader Ignore any Unicode byte-order mark and don't generate any.
819
820	\omitvalue FreeFunction
821	*/
822
823	/!*
824	\fn QTextCodec::ConverterState::ConverterState(ConversionFlags flags)
825
826	Constructs a ConverterState object initialized with the given \a flags.
827	*/
828
829	/!*
830	Destroys the ConverterState object.
831	*/
832	QTextCodec::ConverterState::~ConverterState()
833	{
834	if (flags & FreeFunction)
835	(QTextCodecUnalignedPointer::decode(state_data))(this);
836	else if (d)
837	qFree(d);
838	}
839
840	/!*
841	\class QTextCodec
842	\brief The QTextCodec class provides conversions between text encodings.
843	\reentrant
844	\ingroup i18n
845
846	Qt uses Unicode to store, draw and manipulate strings. In many
847	situations you may wish to deal with data that uses a different
848	encoding. For example, most Japanese documents are still stored
849	in Shift-JIS or ISO 2022-JP, while Russian users often have their
850	documents in KOI8-R or Windows-1251.
851
852	Qt provides a set of QTextCodec classes to help with converting
853	non-Unicode formats to and from Unicode. You can also create your
854	own codec classes.
855
856	The supported encodings are:
857
858	\list
859	\o Apple Roman
860	\o \l{Big5 Text Codec}{Big5}
861	\o \l{Big5-HKSCS Text Codec}{Big5-HKSCS}
862	\o CP949
863	\o \l{EUC-JP Text Codec}{EUC-JP}
864	\o \l{EUC-KR Text Codec}{EUC-KR}
865	\o \l{GBK Text Codec}{GB18030-0}
866	\o IBM 850
867	\o IBM 866
868	\o IBM 874
869	\o \l{ISO 2022-JP (JIS) Text Codec}{ISO 2022-JP}
870	\o ISO 8859-1 to 10
871	\o ISO 8859-13 to 16
872	\o Iscii-Bng, Dev, Gjr, Knd, Mlm, Ori, Pnj, Tlg, and Tml
873	\o JIS X 0201
874	\o JIS X 0208
875	\o KOI8-R
876	\o KOI8-U
877	\o MuleLao-1
878	\o ROMAN8
879	\o \l{Shift-JIS Text Codec}{Shift-JIS}
880	\o TIS-620
881	\o \l{TSCII Text Codec}{TSCII}
882	\o UTF-8
883	\o UTF-16
884	\o UTF-16BE
885	\o UTF-16LE
886	\o UTF-32
887	\o UTF-32BE
888	\o UTF-32LE
889	\o Windows-1250 to 1258
890	\o WINSAMI2
891	\endlist
892
893	QTextCodecs can be used as follows to convert some locally encoded
894	string to Unicode. Suppose you have some string encoded in Russian
895	KOI8-R encoding, and want to convert it to Unicode. The simple way
896	to do it is like this:
897
898	\snippet doc/src/snippets/code/src_corelib_codecs_qtextcodec.cpp 0
899
900	After this, \c string holds the text converted to Unicode.
901	Converting a string from Unicode to the local encoding is just as
902	easy:
903
904	\snippet doc/src/snippets/code/src_corelib_codecs_qtextcodec.cpp 1
905
906	To read or write files in various encodings, use QTextStream and
907	its \l{QTextStream::setCodec()}{setCodec()} function. See the
908	\l{tools/codecs}{Codecs} example for an application of QTextCodec
909	to file I/O.
910
911	Some care must be taken when trying to convert the data in chunks,
912	for example, when receiving it over a network. In such cases it is
913	possible that a multi-byte character will be split over two
914	chunks. At best this might result in the loss of a character and
915	at worst cause the entire conversion to fail.
916
917	The approach to use in these situations is to create a QTextDecoder
918	object for the codec and use this QTextDecoder for the whole
919	decoding process, as shown below:
920
921	\snippet doc/src/snippets/code/src_corelib_codecs_qtextcodec.cpp 2
922
923	The QTextDecoder object maintains state between chunks and therefore
924	works correctly even if a multi-byte character is split between
925	chunks.
926
927	\section1 Creating Your Own Codec Class
928
929	Support for new text encodings can be added to Qt by creating
930	QTextCodec subclasses.
931
932	The pure virtual functions describe the encoder to the system and
933	the coder is used as required in the different text file formats
934	supported by QTextStream, and under X11, for the locale-specific
935	character input and output.
936
937	To add support for another encoding to Qt, make a subclass of
938	QTextCodec and implement the functions listed in the table below.
939
940	\table
941	\header \o Function \o Description
942
943	\row \o name()
944	\o Returns the official name for the encoding. If the
945	encoding is listed in the
946	\l{IANA character-sets encoding file}, the name
947	should be the preferred MIME name for the encoding.
948
949	\row \o aliases()
950	\o Returns a list of alternative names for the encoding.
951	QTextCodec provides a default implementation that returns
952	an empty list. For example, "ISO-8859-1" has "latin1",
953	"CP819", "IBM819", and "iso-ir-100" as aliases.
954
955	\row \o mibEnum()
956	\o Return the MIB enum for the encoding if it is listed in
957	the \l{IANA character-sets encoding file}.
958
959	\row \o convertToUnicode()
960	\o Converts an 8-bit character string to Unicode.
961
962	\row \o convertFromUnicode()
963	\o Converts a Unicode string to an 8-bit character string.
964	\endtable
965
966	You may find it more convenient to make your codec class
967	available as a plugin; see \l{How to Create Qt Plugins} for
968	details.
969
970	\sa QTextStream, QTextDecoder, QTextEncoder, {Codecs Example}
971	*/
972
973	/!*
974	Constructs a QTextCodec, and gives it the highest precedence. The
975	QTextCodec should always be constructed on the heap (i.e. with \c
976	new). Qt takes ownership and will delete it when the application
977	terminates.
978	*/
979	QTextCodec::QTextCodec()
980	{
981	#ifndef QT_NO_THREAD
982	QMutexLocker locker(textCodecsMutex());
983	#endif
984	setup();
985	all->prepend(this);
986	}
987
988
989	/!*
990	\nonreentrant
991
992	Destroys the QTextCodec. Note that you should not delete codecs
993	yourself: once created they become Qt's responsibility.
994	*/
995	QTextCodec::~QTextCodec()
996	{
997	#ifdef Q_DEBUG_TEXTCODEC
998	if (!destroying_is_ok)
999	qWarning("QTextCodec::~QTextCodec: Called by application");
1000	#endif
1001	if (all) {
1002	#ifndef QT_NO_THREAD
1003	QMutexLocker locker(textCodecsMutex());
1004	#endif
1005	all->removeAll(this);
1006	QTextCodecCache *cache = qTextCodecCache();
1007	if (cache)
1008	cache->clear();
1009	}
1010	}
1011
1012	/!*
1013	\fn QTextCodec QTextCodec::codecForName(const char name)
1014
1015	Searches all installed QTextCodec objects and returns the one
1016	which best matches \a name; the match is case-insensitive. Returns
1017	0 if no codec matching the name \a name could be found.
1018	*/
1019
1020	/!*
1021	Searches all installed QTextCodec objects and returns the one
1022	which best matches \a name; the match is case-insensitive. Returns
1023	0 if no codec matching the name \a name could be found.
1024	*/
1025	QTextCodec QTextCodec::codecForName(const* QByteArray &name)
1026	{
1027	if (name.isEmpty())
1028	return `0`;
1029
1030	#ifndef QT_NO_THREAD
1031	QMutexLocker locker(textCodecsMutex());
1032	#endif
1033	setup();
1034
1035	if (!validCodecs())
1036	return `0`;
1037
1038	QTextCodecCache *cache = qTextCodecCache();
1039	QTextCodec *codec;
1040	if (cache) {
1041	codec = cache->value(name);
1042	if (codec)
1043	return codec;
1044	}
1045
1046	for (int i = `0`; i < all->size(); ++i) {
1047	QTextCodec *cursor = all->at(i);
1048	if (nameMatch(cursor->name(), name)) {
1049	if (cache)
1050	cache->insert(name, cursor);
1051	return cursor;
1052	}
1053	QList<QByteArray> aliases = cursor->aliases();
1054	for (int y = `0`; y < aliases.size(); ++y)
1055	if (nameMatch(aliases.at(y), name)) {
1056	if (cache)
1057	cache->insert(name, cursor);
1058	return cursor;
1059	}
1060	}
1061
1062	codec = createForName(name);
1063	if (codec && cache)
1064	cache->insert(name, codec);
1065	return codec;
1066	}
1067
1068
1069	/!*
1070	Returns the QTextCodec which matches the \link
1071	QTextCodec::mibEnum() MIBenum\endlink \a mib.
1072	*/
1073	QTextCodec* QTextCodec::codecForMib(int mib)
1074	{
1075	#ifndef QT_NO_THREAD
1076	QMutexLocker locker(textCodecsMutex());
1077	#endif
1078	setup();
1079
1080	if (!validCodecs())
1081	return `0`;
1082
1083	QByteArray key = "MIB: " + QByteArray::number(mib);
1084	QTextCodecCache *cache = qTextCodecCache();
1085	QTextCodec *codec;
1086	if (cache) {
1087	codec = cache->value(key);
1088	if (codec)
1089	return codec;
1090	}
1091
1092	QList<QTextCodec*>::ConstIterator i;
1093	for (int i = `0`; i < all->size(); ++i) {
1094	QTextCodec *cursor = all->at(i);
1095	if (cursor->mibEnum() == mib) {
1096	if (cache)
1097	cache->insert(key, cursor);
1098	return cursor;
1099	}
1100	}
1101
1102	codec = createForMib(mib);
1103
1104	// Qt 3 used 1000 (mib for UCS2) as its identifier for the utf16 codec. Map
1105	// this correctly for compatibility.
1106	if (!codec && mib == `1000`)
1107	return codecForMib(`1015`);
1108
1109	if (codec && cache)
1110	cache->insert(key, codec);
1111	return codec;
1112	}
1113
1114	/!*
1115	Returns the list of all available codecs, by name. Call
1116	QTextCodec::codecForName() to obtain the QTextCodec for the name.
1117
1118	The list may contain many mentions of the same codec
1119	if the codec has aliases.
1120
1121	\sa availableMibs(), name(), aliases()
1122	*/
1123	QList<QByteArray> QTextCodec::availableCodecs()
1124	{
1125	#ifndef QT_NO_THREAD
1126	QMutexLocker locker(textCodecsMutex());
1127	#endif
1128	setup();
1129
1130	QList<QByteArray> codecs;
1131
1132	if (!validCodecs())
1133	return codecs;
1134
1135	for (int i = `0`; i < all->size(); ++i) {
1136	codecs += all->at(i)->name();
1137	codecs += all->at(i)->aliases();
1138	}
1139
1140	#ifndef QT_NO_THREAD
1141	locker.unlock();
1142	#endif
1143
1144	#if !defined(QT_NO_LIBRARY) && !defined(QT_NO_TEXTCODECPLUGIN)
1145	QFactoryLoader *l = loader();
1146	QStringList keys = l->keys();
1147	for (int i = `0`; i < keys.size(); ++i) {
1148	if (!keys.at(i).startsWith(QLatin1String ("MIB: "))) {
1149	QByteArray name = keys.at(i).toLatin1();
1150	if (!codecs.contains(name))
1151	codecs += name;
1152	}
1153	}
1154	#endif
1155
1156	return codecs;
1157	}
1158
1159	/!*
1160	Returns the list of MIBs for all available codecs. Call
1161	QTextCodec::codecForMib() to obtain the QTextCodec for the MIB.
1162
1163	\sa availableCodecs(), mibEnum()
1164	*/
1165	QList<int> QTextCodec::availableMibs()
1166	{
1167	#ifndef QT_NO_THREAD
1168	QMutexLocker locker(textCodecsMutex());
1169	#endif
1170	setup();
1171
1172	QList<int> codecs;
1173
1174	if (!validCodecs())
1175	return codecs;
1176
1177	for (int i = `0`; i < all->size(); ++i)
1178	codecs += all->at(i)->mibEnum();
1179
1180	#ifndef QT_NO_THREAD
1181	locker.unlock();
1182	#endif
1183
1184	#if !defined(QT_NO_LIBRARY) && !defined(QT_NO_TEXTCODECPLUGIN)
1185	QFactoryLoader *l = loader();
1186	QStringList keys = l->keys();
1187	for (int i = `0`; i < keys.size(); ++i) {
1188	if (keys.at(i).startsWith(QLatin1String ("MIB: "))) {
1189	int mib = keys.at(i).mid(`5`).toInt();
1190	if (!codecs.contains(mib))
1191	codecs += mib;
1192	}
1193	}
1194	#endif
1195
1196	return codecs;
1197	}
1198
1199	/!*
1200	Set the codec to \a c; this will be returned by
1201	codecForLocale(). If \a c is a null pointer, the codec is reset to
1202	the default.
1203
1204	This might be needed for some applications that want to use their
1205	own mechanism for setting the locale.
1206
1207	\sa codecForLocale()
1208	*/
1209	void QTextCodec::setCodecForLocale(QTextCodec *c)
1210	{
1211	#ifndef QT_NO_THREAD
1212	QMutexLocker locker(textCodecsMutex());
1213	#endif
1214	localeMapper = c;
1215	if (!localeMapper)
1216	setupLocaleMapper();
1217	}
1218
1219	/!*
1220	Returns a pointer to the codec most suitable for this locale.
1221
1222	On Windows, the codec will be based on a system locale. On Unix
1223	systems, starting with Qt 4.2, the codec will be using the \e
1224	iconv library. Note that in both cases the codec's name will be
1225	"System".
1226	*/
1227
1228	QTextCodec* QTextCodec::codecForLocale()
1229	{
1230	if (!validCodecs())
1231	return `0`;
1232
1233	if (localeMapper)
1234	return localeMapper;
1235
1236	#ifndef QT_NO_THREAD
1237	QMutexLocker locker(textCodecsMutex());
1238	#endif
1239	setup();
1240
1241	return localeMapper;
1242	}
1243
1244
1245	/!*
1246	\fn QByteArray QTextCodec::name() const
1247
1248	QTextCodec subclasses must reimplement this function. It returns
1249	the name of the encoding supported by the subclass.
1250
1251	If the codec is registered as a character set in the
1252	\l{IANA character-sets encoding file} this method should
1253	return the preferred mime name for the codec if defined,
1254	otherwise its name.
1255	*/
1256
1257	/!*
1258	\fn int QTextCodec::mibEnum() const
1259
1260	Subclasses of QTextCodec must reimplement this function. It
1261	returns the MIBenum (see \l{IANA character-sets encoding file}
1262	for more information). It is important that each QTextCodec
1263	subclass returns the correct unique value for this function.
1264	*/
1265
1266	/!*
1267	Subclasses can return a number of aliases for the codec in question.
1268
1269	Standard aliases for codecs can be found in the
1270	\l{IANA character-sets encoding file}.
1271	*/
1272	QList<QByteArray> QTextCodec::aliases() const
1273	{
1274	return QList<QByteArray>();
1275	}
1276
1277	/!*
1278	\fn QString QTextCodec::convertToUnicode(const char chars, int len,*
1279	ConverterState state) const*
1280
1281	QTextCodec subclasses must reimplement this function.
1282
1283	Converts the first \a len characters of \a chars from the
1284	encoding of the subclass to Unicode, and returns the result in a
1285	QString.
1286
1287	\a state can be 0, in which case the conversion is stateless and
1288	default conversion rules should be used. If state is not 0, the
1289	codec should save the state after the conversion in \a state, and
1290	adjust the remainingChars and invalidChars members of the struct.
1291	*/
1292
1293	/!*
1294	\fn QByteArray QTextCodec::convertFromUnicode(const QChar input, int number,*
1295	ConverterState state) const*
1296
1297	QTextCodec subclasses must reimplement this function.
1298
1299	Converts the first \a number of characters from the \a input array
1300	from Unicode to the encoding of the subclass, and returns the result
1301	in a QByteArray.
1302
1303	\a state can be 0 in which case the conversion is stateless and
1304	default conversion rules should be used. If state is not 0, the
1305	codec should save the state after the conversion in \a state, and
1306	adjust the remainingChars and invalidChars members of the struct.
1307	*/
1308
1309	/!*
1310	Creates a QTextDecoder which stores enough state to decode chunks
1311	of \c{char } data to create chunks of Unicode data.*
1312
1313	The caller is responsible for deleting the returned object.
1314	*/
1315	QTextDecoder* QTextCodec::makeDecoder() const
1316	{
1317	return new QTextDecoder (this);
1318	}
1319
1320	/!*
1321	Creates a QTextDecoder with a specified \a flags to decode chunks
1322	of \c{char } data to create chunks of Unicode data.*
1323
1324	The caller is responsible for deleting the returned object.
1325
1326	\since 4.7
1327	*/
1328	QTextDecoder* QTextCodec::makeDecoder(QTextCodec::ConversionFlags flags) const
1329	{
1330	return new QTextDecoder (this, flags);
1331	}
1332
1333
1334	/!*
1335	Creates a QTextEncoder which stores enough state to encode chunks
1336	of Unicode data as \c{char } data.*
1337
1338	The caller is responsible for deleting the returned object.
1339	*/
1340	QTextEncoder* QTextCodec::makeEncoder() const
1341	{
1342	return new QTextEncoder (this);
1343	}
1344
1345	/!*
1346	Creates a QTextEncoder with a specified \a flags to encode chunks
1347	of Unicode data as \c{char } data.*
1348
1349	The caller is responsible for deleting the returned object.
1350
1351	\since 4.7
1352	*/
1353	QTextEncoder* QTextCodec::makeEncoder(QTextCodec::ConversionFlags flags) const
1354	{
1355	return new QTextEncoder (this, flags);
1356	}
1357
1358	/!*
1359	\fn QByteArray QTextCodec::fromUnicode(const QChar input, int number,*
1360	ConverterState state) const*
1361
1362	Converts the first \a number of characters from the \a input array
1363	from Unicode to the encoding of this codec, and returns the result
1364	in a QByteArray.
1365
1366	The \a state of the convertor used is updated.
1367	*/
1368
1369	/!*
1370	Converts \a str from Unicode to the encoding of this codec, and
1371	returns the result in a QByteArray.
1372	*/
1373	QByteArray QTextCodec::fromUnicode(const QString& str) const
1374	{
1375	return convertFromUnicode(str.constData(), str.length(), `0`);
1376	}
1377
1378	/!*
1379	\fn QString QTextCodec::toUnicode(const char input, int size,*
1380	ConverterState state) const*
1381
1382	Converts the first \a size characters from the \a input from the
1383	encoding of this codec to Unicode, and returns the result in a
1384	QString.
1385
1386	The \a state of the convertor used is updated.
1387	*/
1388
1389	/!*
1390	Converts \a a from the encoding of this codec to Unicode, and
1391	returns the result in a QString.
1392	*/
1393	QString QTextCodec::toUnicode(const QByteArray& a) const
1394	{
1395	return convertToUnicode(a.constData(), a.length(), `0`);
1396	}
1397
1398	/!*
1399	Returns true if the Unicode character \a ch can be fully encoded
1400	with this codec; otherwise returns false.
1401	*/
1402	bool QTextCodec::canEncode(QChar ch) const
1403	{
1404	ConverterState state;
1405	state.flags = ConvertInvalidToNull;
1406	convertFromUnicode(&ch, `1`, &state);
1407	return (state.invalidChars == `0`);
1408	}
1409
1410	/!*
1411	\overload
1412
1413	\a s contains the string being tested for encode-ability.
1414	*/
1415	bool QTextCodec::canEncode(const QString& s) const
1416	{
1417	ConverterState state;
1418	state.flags = ConvertInvalidToNull;
1419	convertFromUnicode(s.constData(), s.length(), &state);
1420	return (state.invalidChars == `0`);
1421	}
1422
1423	#ifdef QT3_SUPPORT
1424	/!*
1425	Returns a string representing the current language and
1426	sublanguage, e.g. "pt" for Portuguese, or "pt_br" for Portuguese/Brazil.
1427
1428	\sa QLocale
1429	*/
1430	const char *QTextCodec::locale()
1431	{
1432	static char locale[`6`];
1433	QByteArray l = QLocale::system().name().toLatin1();
1434	int len = qMin(l.length(), `5`);
1435	memcpy(locale, l.constData(), len);
1436	locale[len] = '\0';
1437
1438	return locale;
1439	}
1440
1441	/!*
1442	\overload
1443	*/
1444
1445	QByteArray QTextCodec::fromUnicode(const QString& uc, int& lenInOut) const
1446	{
1447	QByteArray result = convertFromUnicode(uc.constData(), lenInOut, `0`);
1448	lenInOut = result.length();
1449	return result;
1450	}
1451
1452	/!*
1453	\overload
1454
1455	\a a contains the source characters; \a len contains the number of
1456	characters in \a a to use.
1457	*/
1458	QString QTextCodec::toUnicode(const QByteArray& a, int len) const
1459	{
1460	len = qMin(a.size(), len);
1461	return convertToUnicode(a.constData(), len, `0`);
1462	}
1463	#endif
1464
1465	/!*
1466	\overload
1467
1468	\a chars contains the source characters.
1469	*/
1470	QString QTextCodec::toUnicode(const char chars) const*
1471	{
1472	int len = qstrlen(chars);
1473	return convertToUnicode(chars, len, `0`);
1474	}
1475
1476
1477	/!*
1478	\class QTextEncoder
1479	\brief The QTextEncoder class provides a state-based encoder.
1480	\reentrant
1481	\ingroup i18n
1482
1483	A text encoder converts text from Unicode into an encoded text format
1484	using a specific codec.
1485
1486	The encoder converts Unicode into another format, remembering any
1487	state that is required between calls.
1488
1489	\sa QTextCodec::makeEncoder(), QTextDecoder
1490	*/
1491
1492	/!*
1493	\fn QTextEncoder::QTextEncoder(const QTextCodec codec)*
1494
1495	Constructs a text encoder for the given \a codec.
1496	*/
1497
1498	/!*
1499	Constructs a text encoder for the given \a codec and conversion \a flags.
1500
1501	\since 4.7
1502	*/
1503	QTextEncoder::QTextEncoder(const QTextCodec *codec, QTextCodec::ConversionFlags flags)
1504	: c(codec), state ()
1505	{
1506	state.flags = flags;
1507	}
1508
1509	/!*
1510	Destroys the encoder.
1511	*/
1512	QTextEncoder::~QTextEncoder()
1513	{
1514	}
1515
1516	/! \internal*
1517	\since 4.5
1518	Determines whether the eecoder encountered a failure while decoding the input. If
1519	an error was encountered, the produced result is undefined, and gets converted as according
1520	to the conversion flags.
1521	*/
1522	bool QTextEncoder::hasFailure() const
1523	{
1524	return state.invalidChars != `0`;
1525	}
1526
1527	/!*
1528	Converts the Unicode string \a str into an encoded QByteArray.
1529	*/
1530	QByteArray QTextEncoder::fromUnicode(const QString& str)
1531	{
1532	QByteArray result = c->fromUnicode(str.constData(), str.length(), &state);
1533	return result;
1534	}
1535
1536	/!*
1537	\overload
1538
1539	Converts \a len characters (not bytes) from \a uc, and returns the
1540	result in a QByteArray.
1541	*/
1542	QByteArray QTextEncoder::fromUnicode(const QChar uc, int* len)
1543	{
1544	QByteArray result = c->fromUnicode(uc, len, &state);
1545	return result;
1546	}
1547
1548	#ifdef QT3_SUPPORT
1549	/!*
1550	\overload
1551
1552	Converts \a lenInOut characters (not bytes) from \a uc, and returns the
1553	result in a QByteArray. The number of characters read is returned in
1554	the \a lenInOut parameter.
1555	*/
1556	QByteArray QTextEncoder::fromUnicode(const QString& uc, int& lenInOut)
1557	{
1558	QByteArray result = c->fromUnicode(uc.constData(), lenInOut, &state);
1559	lenInOut = result.length();
1560	return result;
1561	}
1562	#endif
1563
1564	/!*
1565	\class QTextDecoder
1566	\brief The QTextDecoder class provides a state-based decoder.
1567	\reentrant
1568	\ingroup i18n
1569
1570	A text decoder converts text from an encoded text format into Unicode
1571	using a specific codec.
1572
1573	The decoder converts text in this format into Unicode, remembering any
1574	state that is required between calls.
1575
1576	\sa QTextCodec::makeDecoder(), QTextEncoder
1577	*/
1578
1579	/!*
1580	\fn QTextDecoder::QTextDecoder(const QTextCodec codec)*
1581
1582	Constructs a text decoder for the given \a codec.
1583	*/
1584
1585	/!*
1586	Constructs a text decoder for the given \a codec and conversion \a flags.
1587
1588	\since 4.7
1589	*/
1590
1591	QTextDecoder::QTextDecoder(const QTextCodec *codec, QTextCodec::ConversionFlags flags)
1592	: c(codec), state ()
1593	{
1594	state.flags = flags;
1595	}
1596
1597	/!*
1598	Destroys the decoder.
1599	*/
1600	QTextDecoder::~QTextDecoder()
1601	{
1602	}
1603
1604	/!*
1605	\fn QString QTextDecoder::toUnicode(const char chars, int len)*
1606
1607	Converts the first \a len bytes in \a chars to Unicode, returning
1608	the result.
1609
1610	If not all characters are used (e.g. if only part of a multi-byte
1611	encoding is at the end of the characters), the decoder remembers
1612	enough state to continue with the next call to this function.
1613	*/
1614	QString QTextDecoder::toUnicode(const char chars, int* len)
1615	{
1616	return c->toUnicode(chars, len, &state);
1617	}
1618
1619
1620	/! \overload*
1621
1622	The converted string is returned in \a target.
1623	*/
1624	void QTextDecoder::toUnicode(QString target, const* char chars, int* len)
1625	{
1626	Q_ASSERT(target);
1627	switch (c->mibEnum()) {
1628	case `106`: // utf8
1629	static_cast<const QUtf8Codec*>(c)->convertToUnicode(target, chars, len, &state);
1630	break;
1631	case `4`: { // latin1
1632	target->resize(len);
1633	ushort data = (ushort)target->data();
1634	for (int i = len; i >=`0`; --i)
1635	data[i] = (uchar) chars[i];
1636	} break;
1637	default:
1638	*target = c->toUnicode(chars, len, &state);
1639	}
1640	}
1641
1642
1643	/!*
1644	\overload
1645
1646	Converts the bytes in the byte array specified by \a ba to Unicode
1647	and returns the result.
1648	*/
1649	QString QTextDecoder::toUnicode(const QByteArray &ba)
1650	{
1651	return c->toUnicode(ba.constData(), ba.length(), &state);
1652	}
1653
1654
1655	/!*
1656	\fn QTextCodec QTextCodec::codecForTr()*
1657
1658	Returns the codec used by QObject::tr() on its argument. If this
1659	function returns 0 (the default), tr() assumes Latin-1.
1660
1661	\sa setCodecForTr()
1662	*/
1663
1664	/!*
1665	\fn void QTextCodec::setCodecForTr(QTextCodec c)*
1666	\nonreentrant
1667
1668	Sets the codec used by QObject::tr() on its argument to \a c. If
1669	\a c is 0 (the default), tr() assumes Latin-1.
1670
1671	If the literal quoted text in the program is not in the Latin-1
1672	encoding, this function can be used to set the appropriate
1673	encoding. For example, software developed by Korean programmers
1674	might use eucKR for all the text in the program, in which case the
1675	main() function might look like this:
1676
1677	\snippet doc/src/snippets/code/src_corelib_codecs_qtextcodec.cpp 3
1678
1679	Note that this is not the way to select the encoding that the \e
1680	user has chosen. For example, to convert an application containing
1681	literal English strings to Korean, all that is needed is for the
1682	English strings to be passed through tr() and for translation
1683	files to be loaded. For details of internationalization, see
1684	\l{Internationalization with Qt}.
1685
1686	\sa codecForTr(), setCodecForCStrings()
1687	*/
1688
1689
1690	/!*
1691	\fn QTextCodec QTextCodec::codecForCStrings()*
1692
1693	Returns the codec used by QString to convert to and from \c{const
1694	char } and QByteArrays. If this function returns 0 (the default),*
1695	QString assumes Latin-1.
1696
1697	\sa setCodecForCStrings()
1698	*/
1699
1700	/!*
1701	\fn void QTextCodec::setCodecForCStrings(QTextCodec codec)*
1702	\nonreentrant
1703
1704	Sets the codec used by QString to convert to and from \c{const
1705	char } and QByteArrays. If the \a codec is 0 (the default),*
1706	QString assumes Latin-1.
1707
1708	\warning Some codecs do not preserve the characters in the ASCII
1709	range (0x00 to 0x7F). For example, the Japanese Shift-JIS
1710	encoding maps the backslash character (0x5A) to the Yen
1711	character. To avoid undesirable side-effects, we recommend
1712	avoiding such codecs with setCodecsForCString().
1713
1714	\sa codecForCStrings(), setCodecForTr()
1715	*/
1716
1717	/!*
1718	\since 4.4
1719
1720	Tries to detect the encoding of the provided snippet of HTML in
1721	the given byte array, \a ba, by checking the BOM (Byte Order Mark)
1722	and the content-type meta header and returns a QTextCodec instance
1723	that is capable of decoding the html to unicode. If the codec
1724	cannot be detected from the content provided, \a defaultCodec is
1725	returned.
1726
1727	\sa codecForUtfText()
1728	*/
1729	QTextCodec QTextCodec::codecForHtml(const* QByteArray &ba, QTextCodec *defaultCodec)
1730	{
1731	// determine charset
1732	int pos;
1733	QTextCodec *c = `0`;
1734
1735	c = QTextCodec::codecForUtfText(ba, c);
1736	if (!c) {
1737	QByteArray header = ba.left(`512`).toLower();
1738	if ((pos = header.indexOf("http-equiv=")) != -`1`) {
1739	if ((pos = header.lastIndexOf("meta ", pos)) != -`1`) {
1740	pos = header.indexOf("charset=", pos) + int(strlen("charset="));
1741	if (pos != -`1`) {
1742	int pos2 = header.indexOf('\"', pos+`1`);
1743	QByteArray cs = header.mid(pos, pos2-pos);
1744	// qDebug("found charset: %s", cs.data());
1745	c = QTextCodec::codecForName(cs);
1746	}
1747	}
1748	}
1749	}
1750	if (!c)
1751	c = defaultCodec;
1752
1753	return c;
1754	}
1755
1756	/!*
1757	\overload
1758
1759	Tries to detect the encoding of the provided snippet of HTML in
1760	the given byte array, \a ba, by checking the BOM (Byte Order Mark)
1761	and the content-type meta header and returns a QTextCodec instance
1762	that is capable of decoding the html to unicode. If the codec cannot
1763	be detected, this overload returns a Latin-1 QTextCodec.
1764	*/
1765	QTextCodec QTextCodec::codecForHtml(const* QByteArray &ba)
1766	{
1767	return codecForHtml(ba, QTextCodec::codecForMib(/Latin 1/ `4`));
1768	}
1769
1770	/!*
1771	\since 4.6
1772
1773	Tries to detect the encoding of the provided snippet \a ba by
1774	using the BOM (Byte Order Mark) and returns a QTextCodec instance
1775	that is capable of decoding the text to unicode. If the codec
1776	cannot be detected from the content provided, \a defaultCodec is
1777	returned.
1778
1779	\sa codecForHtml()
1780	*/
1781	QTextCodec QTextCodec::codecForUtfText(const* QByteArray &ba, QTextCodec *defaultCodec)
1782	{
1783	const int arraySize = ba.size();
1784
1785	if (arraySize > `3`) {
1786	if ((uchar)ba [`0`] == `0x00`
1787	&& (uchar)ba [`1`] == `0x00`
1788	&& (uchar)ba [`2`] == `0xFE`
1789	&& (uchar)ba [`3`] == `0xFF`)
1790	return QTextCodec::codecForMib(`1018`); // utf-32 be
1791	else if ((uchar)ba [`0`] == `0xFF`
1792	&& (uchar)ba [`1`] == `0xFE`
1793	&& (uchar)ba [`2`] == `0x00`
1794	&& (uchar)ba [`3`] == `0x00`)
1795	return QTextCodec::codecForMib(`1019`); // utf-32 le
1796	}
1797
1798	if (arraySize < `2`)
1799	return defaultCodec;
1800	if ((uchar)ba [`0`] == `0xfe` && (uchar)ba [`1`] == `0xff`)
1801	return QTextCodec::codecForMib(`1013`); // utf16 be
1802	else if ((uchar)ba [`0`] == `0xff` && (uchar)ba [`1`] == `0xfe`)
1803	return QTextCodec::codecForMib(`1014`); // utf16 le
1804
1805	if (arraySize < `3`)
1806	return defaultCodec;
1807	if ((uchar)ba [`0`] == `0xef`
1808	&& (uchar)ba [`1`] == `0xbb`
1809	&& (uchar)ba [`2`] == `0xbf`)
1810	return QTextCodec::codecForMib(`106`); // utf-8
1811
1812	return defaultCodec;
1813	}
1814
1815	/!*
1816	\overload
1817
1818	Tries to detect the encoding of the provided snippet \a ba by
1819	using the BOM (Byte Order Mark) and returns a QTextCodec instance
1820	that is capable of decoding the text to unicode. If the codec
1821	cannot be detected, this overload returns a Latin-1 QTextCodec.
1822
1823	\sa codecForHtml()
1824	*/
1825	QTextCodec QTextCodec::codecForUtfText(const* QByteArray &ba)
1826	{
1827	return codecForUtfText(ba, QTextCodec::codecForMib(/Latin 1/ `4`));
1828	}
1829
1830
1831	/! \internal*
1832	\since 4.3
1833	Determines whether the decoder encountered a failure while decoding the input. If
1834	an error was encountered, the produced result is undefined, and gets converted as according
1835	to the conversion flags.
1836	*/
1837	bool QTextDecoder::hasFailure() const
1838	{
1839	return state.invalidChars != `0`;
1840	}
1841
1842	/!*
1843	\fn QTextCodec QTextCodec::codecForContent(const char str, int size)
1844
1845	This functionality is no longer provided by Qt. This
1846	compatibility function always returns a null pointer.
1847	*/
1848
1849	/!*
1850	\fn QTextCodec QTextCodec::codecForName(const char hint, int accuracy)
1851
1852	Use the codecForName(const QByteArray &) overload instead.
1853	*/
1854
1855	/!*
1856	\fn QTextCodec QTextCodec::codecForIndex(int i)*
1857
1858	Use availableCodecs() or availableMibs() instead and iterate
1859	through the resulting list.
1860	*/
1861
1862
1863	/!*
1864	\fn QByteArray QTextCodec::mimeName() const
1865
1866	Use name() instead.
1867	*/
1868
1869	QT_END_NAMESPACE
1870
1871	#endif // QT_NO_TEXTCODEC
1872