rfccodecs.cpp [kdepimlibs/kimap/rfccodecs.cpp]

1	/**********************************************************************
2	*
3	* rfccodecs.cpp - handler for various rfc/mime encodings
4	* Copyright (C) 2000 s.carstens@gmx.de
5	*
6	* This library is free software; you can redistribute it and/or
7	* modify it under the terms of the GNU Library General Public
8	* License as published by the Free Software Foundation; either
9	* version 2 of the License, or (at your option) any later version.
10	*
11	* This library is distributed in the hope that it will be useful,
12	* but WITHOUT ANY WARRANTY; without even the implied warranty of
13	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14	* Library General Public License for more details.
15	*
16	* You should have received a copy of the GNU Library General Public License
17	* along with this library; see the file COPYING.LIB. If not, write to
18	* the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
19	* Boston, MA 02110-1301, USA.
20	*
21	*********************************************************************/
22	/**
23	* @file
24	* This file is part of the IMAP support library and defines the
25	* RfcCodecs class.
26	*
27	* @brief
28	* Defines the RfcCodecs class.
29	*
30	* @author Sven Carstens
31	*/
32
33	#include "rfccodecs.h"
34
35	#include <ctype.h>
36	#include <sys/types.h>
37
38	#include <stdio.h>
39	#include <stdlib.h>
40
41	#include <QtCore/QTextCodec>
42	#include <QtCore/QBuffer>
43	#include <QtCore/QRegExp>
44	#include <QtCore/QByteArray>
45	#include <QtCore/QLatin1Char>
46	#include <kcodecs.h>
47
48	using namespace KIMAP;
49
50	// This part taken from rfc 2192 IMAP URL Scheme. C. Newman. September 1997.
51	// adapted to QT-Toolkit by Sven Carstens <s.carstens@gmx.de> 2000
52
53	//@cond PRIVATE
54	static const unsigned char base64chars[] =
55	"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+,";
56	#define UNDEFINED 64
57	#define MAXLINE 76
58	static const char especials[`17`] = "()<>@,;:\"/[]?.= ";
59
60	/ UTF16 definitions /
61	#define UTF16MASK 0x03FFUL
62	#define UTF16SHIFT 10
63	#define UTF16BASE 0x10000UL
64	#define UTF16HIGHSTART 0xD800UL
65	#define UTF16HIGHEND 0xDBFFUL
66	#define UTF16LOSTART 0xDC00UL
67	#define UTF16LOEND 0xDFFFUL
68	//@endcond
69
70	//-----------------------------------------------------------------------------
71	QByteArray KIMAP::decodeImapFolderName( const QByteArray &inSrc )
72	{
73	unsigned char c, i, bitcount;
74	unsigned long ucs4, utf16, bitbuf;
75	unsigned char base64[`256`], utf8[`6`];
76	unsigned int srcPtr = `0`;
77	QByteArray dst;
78	QByteArray src = inSrc;
79	uint srcLen = inSrc.length();
80
81	/ initialize modified base64 decoding table /
82	memset( base64, UNDEFINED, sizeof( base64 ) );
83	for ( i = `0`; i < sizeof( base64chars ); ++i ) {
84	base64[(int)base64chars[i]] = i;
85	}
86
87	/ loop until end of string /
88	while ( srcPtr < srcLen ) {
89	c = src [srcPtr++];
90	/ deal with literal characters and &- /
91	if ( c != '&' \|\| src [srcPtr] == '-' ) {
92	/ encode literally /
93	dst += c;
94	/ skip over the '-' if this is an &- sequence /
95	if ( c == '&' ) {
96	srcPtr++;
97	}
98	} else {
99	/ convert modified UTF-7 -> UTF-16 -> UCS-4 -> UTF-8 -> HEX /
100	bitbuf = `0`;
101	bitcount = `0`;
102	ucs4 = `0`;
103	while ( ( c = base64[(unsigned char)src [srcPtr]] ) != UNDEFINED ) {
104	++srcPtr;
105	bitbuf = ( bitbuf << `6` ) \| c;
106	bitcount += `6`;
107	/ enough bits for a UTF-16 character? /
108	if ( bitcount >= `16` ) {
109	bitcount -= `16`;
110	utf16 = ( bitcount ? bitbuf >> bitcount : bitbuf ) & `0xffff`;
111	/ convert UTF16 to UCS4 /
112	if ( utf16 >= UTF16HIGHSTART && utf16 <= UTF16HIGHEND ) {
113	ucs4 = ( utf16 - UTF16HIGHSTART ) << UTF16SHIFT;
114	continue;
115	} else if ( utf16 >= UTF16LOSTART && utf16 <= UTF16LOEND ) {
116	ucs4 += utf16 - UTF16LOSTART + UTF16BASE;
117	} else {
118	ucs4 = utf16;
119	}
120	/ convert UTF-16 range of UCS4 to UTF-8 /
121	if ( ucs4 <= `0x7fUL` ) {
122	utf8[`0`] = ucs4;
123	i = `1`;
124	} else if ( ucs4 <= `0x7ffUL` ) {
125	utf8[`0`] = `0xc0` \| ( ucs4 >> `6` );
126	utf8[`1`] = `0x80` \| ( ucs4 & `0x3f` );
127	i = `2`;
128	} else if ( ucs4 <= `0xffffUL` ) {
129	utf8[`0`] = `0xe0` \| ( ucs4 >> `12` );
130	utf8[`1`] = `0x80` \| ( ( ucs4 >> `6` ) & `0x3f` );
131	utf8[`2`] = `0x80` \| ( ucs4 & `0x3f` );
132	i = `3`;
133	} else {
134	utf8[`0`] = `0xf0` \| ( ucs4 >> `18` );
135	utf8[`1`] = `0x80` \| ( ( ucs4 >> `12` ) & `0x3f` );
136	utf8[`2`] = `0x80` \| ( ( ucs4 >> `6` ) & `0x3f` );
137	utf8[`3`] = `0x80` \| ( ucs4 & `0x3f` );
138	i = `4`;
139	}
140	/ copy it /
141	for ( c = `0`; c < i; ++c ) {
142	dst += utf8[c];
143	}
144	}
145	}
146	/ skip over trailing '-' in modified UTF-7 encoding /
147	if ( src [srcPtr] == '-' ) {
148	++srcPtr;
149	}
150	}
151	}
152	return dst;
153	}
154
155	QString KIMAP::decodeImapFolderName( const QString &inSrc )
156	{
157	return QString::fromUtf8( decodeImapFolderName( inSrc.toUtf8() ).data() );
158	}
159
160	//-----------------------------------------------------------------------------
161
162	QByteArray KIMAP::quoteIMAP( const QByteArray &src )
163	{
164	uint len = src.length();
165	QByteArray result;
166	result.reserve( `2` * len );
167	for ( unsigned int i = `0`; i < len; i++ ) {
168	if ( src [i] == '"' \|\| src [i] == '\\' ) {
169	result += '\\';
170	}
171	result += src [i];
172	}
173	result.squeeze();
174	return result;
175	}
176
177	QString KIMAP::quoteIMAP( const QString &src )
178	{
179	uint len = src.length();
180	QString result;
181	result.reserve( `2` * len );
182	for ( unsigned int i = `0`; i < len; i++ ) {
183	if ( src [i] == QLatin1Char ('"') \|\| src [i] == QLatin1Char ('\\') ) {
184	result += QLatin1Char ('\\');
185	}
186	result += src [i];
187	}
188	//result.squeeze(); - unnecessary and slow
189	return result;
190	}
191
192	//-----------------------------------------------------------------------------
193	QString KIMAP::encodeImapFolderName( const QString &inSrc )
194	{
195	return QString::fromUtf8( encodeImapFolderName( inSrc.toUtf8() ).data() );
196	}
197
198	QByteArray KIMAP::encodeImapFolderName( const QByteArray &inSrc )
199	{
200	unsigned int utf8pos, utf8total, c, utf7mode, bitstogo, utf16flag;
201	unsigned int ucs4, bitbuf;
202	QByteArray src = inSrc;
203	QByteArray dst;
204
205	int srcPtr = `0`;
206	utf7mode = `0`;
207	utf8total = `0`;
208	bitstogo = `0`;
209	utf8pos = `0`;
210	bitbuf = `0`;
211	ucs4 = `0`;
212	while ( srcPtr < src.length () ) {
213	c = (unsigned char)src [srcPtr++];
214	/ normal character? /
215	if ( c >= ' ' && c <= '~' ) {
216	/ switch out of UTF-7 mode /
217	if ( utf7mode ) {
218	if ( bitstogo ) {
219	dst += base64chars[( bitbuf << ( `6` - bitstogo ) ) & `0x3F`];
220	bitstogo = `0`;
221	}
222	dst += '-';
223	utf7mode = `0`;
224	}
225	dst += c;
226	/ encode '&' as '&-' /
227	if ( c == '&' ) {
228	dst += '-';
229	}
230	continue;
231	}
232	/ switch to UTF-7 mode /
233	if ( !utf7mode ) {
234	dst += '&';
235	utf7mode = `1`;
236	}
237	/ Encode US-ASCII characters as themselves /
238	if ( c < `0x80` ) {
239	ucs4 = c;
240	utf8total = `1`;
241	} else if ( utf8total ) {
242	/ save UTF8 bits into UCS4 /
243	ucs4 = ( ucs4 << `6` ) \| ( c & `0x3FUL` );
244	if ( ++utf8pos < utf8total ) {
245	continue;
246	}
247	} else {
248	utf8pos = `1`;
249	if ( c < `0xE0` ) {
250	utf8total = `2`;
251	ucs4 = c & `0x1F`;
252	} else if ( c < `0xF0` ) {
253	utf8total = `3`;
254	ucs4 = c & `0x0F`;
255	} else {
256	/ NOTE: can't convert UTF8 sequences longer than 4 /
257	utf8total = `4`;
258	ucs4 = c & `0x03`;
259	}
260	continue;
261	}
262	/ loop to split ucs4 into two utf16 chars if necessary /
263	utf8total = `0`;
264	do
265	{
266	if ( ucs4 >= UTF16BASE ) {
267	ucs4 -= UTF16BASE;
268	bitbuf =
269	( bitbuf << `16` ) \| ( ( ucs4 >> UTF16SHIFT ) + UTF16HIGHSTART );
270	ucs4 = ( ucs4 & UTF16MASK ) + UTF16LOSTART;
271	utf16flag = `1`;
272	} else {
273	bitbuf = ( bitbuf << `16` ) \| ucs4;
274	utf16flag = `0`;
275	}
276	bitstogo += `16`;
277	/ spew out base64 /
278	while ( bitstogo >= `6` ) {
279	bitstogo -= `6`;
280	dst +=
281	base64chars[( bitstogo ? ( bitbuf >> bitstogo ) : bitbuf ) & `0x3F`];
282	}
283	}
284	while ( utf16flag );
285	}
286	/ if in UTF-7 mode, finish in ASCII /
287	if ( utf7mode ) {
288	if ( bitstogo ) {
289	dst += base64chars[( bitbuf << ( `6` - bitstogo ) ) & `0x3F`];
290	}
291	dst += '-';
292	}
293	return quoteIMAP( dst );
294	}
295
296	//-----------------------------------------------------------------------------
297	QTextCodec KIMAP::codecForName( const* QString &str )
298	{
299	if ( str.isEmpty () ) {
300	return `0`;
301	}
302	return QTextCodec::codecForName ( str.toLower ().
303	replace ( QLatin1String ("windows"), QLatin1String ("cp") ).toLatin1 () );
304	}
305
306	//-----------------------------------------------------------------------------
307	const QString KIMAP::decodeRFC2047String( const QString &str )
308	{
309	QString throw_away;
310
311	return decodeRFC2047String( str, throw_away );
312	}
313
314	//-----------------------------------------------------------------------------
315	const QString KIMAP::decodeRFC2047String( const QString &str,
316	QString &charset )
317	{
318	QString throw_away;
319
320	return decodeRFC2047String( str, charset, throw_away );
321	}
322
323	//-----------------------------------------------------------------------------
324	const QString KIMAP::decodeRFC2047String( const QString &str,
325	QString &charset,
326	QString &language )
327	{
328	//do we have a rfc string
329	if ( !str.contains( QLatin1String ("=?") ) ) {
330	return str;
331	}
332
333	// FIXME get rid of the conversion?
334	QByteArray aStr = str.toLatin1 (); // QString.length() means Unicode chars
335	QByteArray result;
336	char pos, beg, end, mid = `0`;
337	QByteArray cstr;
338	char encoding = `0`, ch;
339	bool valid;
340	const int maxLen = `200`;
341	int i;
342
343	// result.truncate(aStr.length());
344	for ( pos = aStr.data (); *pos; pos++ ) {
345	if ( pos[`0`] != '=' \|\| pos[`1`] != '?' ) {
346	result += *pos;
347	continue;
348	}
349	beg = pos + `2`;
350	end = beg;
351	valid = true;
352	// parse charset name
353	for ( i = `2`, pos += `2`;
354	i < maxLen &&
355	( pos != '?' && ( ispunct( pos ) \|\| isalnum ( *pos ) ) );
356	i++ ) {
357	pos++;
358	}
359	if ( *pos != '?' \|\| i < `4` \|\| i >= maxLen ) {
360	valid = false;
361	} else {
362	charset = QLatin1String (QByteArray ( beg, i - `1` )); // -2 + 1 for the zero
363	int pt = charset.lastIndexOf( QLatin1Char ('*') );
364	if ( pt != -`1` ) {
365	// save language for later usage
366	language = charset.right( charset.length () - pt - `1` );
367
368	// tie off language as defined in rfc2047
369	charset.truncate( pt );
370	}
371	// get encoding and check delimiting question marks
372	encoding = toupper( pos[`1`] );
373	if ( pos[`2`] != '?' \|\|
374	( encoding != 'Q' && encoding != 'B' &&
375	encoding != 'q' && encoding != 'b' ) ) {
376	valid = false;
377	}
378	pos += `3`;
379	i += `3`;
380	// kDebug() << "Charset:" << charset << "- Language:" << language << "-'" << pos << "'";
381	}
382	if ( valid ) {
383	mid = pos;
384	// search for end of encoded part
385	while ( i < maxLen && pos && !( pos == '?' && *( pos + `1` ) == '=' ) ) {
386	i++;
387	pos++;
388	}
389	end = pos + `2`;//end now points to the first char after the encoded string
390	if ( i >= maxLen \|\| !*pos ) {
391	valid = false;
392	}
393	}
394	if ( valid ) {
395	ch = *pos;
396	*pos = '\0';
397	cstr = QByteArray (mid).left( (int)( mid - pos - `1` ) );
398	if ( encoding == 'Q' ) {
399	// decode quoted printable text
400	for ( i = cstr.length () - `1`; i >= `0`; --i ) {
401	if ( cstr [i] == '_' ) {
402	cstr [i] = ' ';
403	}
404	}
405	// kDebug() << "before QP '"
406	// << cstr << "'";
407	cstr = KCodecs::quotedPrintableDecode( cstr );
408	// kDebug() << "after QP '"
409	// << cstr << "'";
410	} else {
411	// decode base64 text
412	cstr = QByteArray::fromBase64( cstr );
413	}
414	*pos = ch;
415	int len = cstr.length();
416	for ( i = `0`; i < len; ++i ) {
417	result += cstr [i];
418	}
419
420	pos = end - `1`;
421	} else {
422	// kDebug() << "invalid";
423	//result += "=?";
424	//pos = beg -1; // because pos gets increased shortly afterwards
425	pos = beg - `2`;
426	result += *pos++;
427	result += *pos;
428	}
429	}
430	if ( !charset.isEmpty () ) {
431	QTextCodec *aCodec = codecForName( QLatin1String (charset.toLatin1 ()) );
432	if ( aCodec ) {
433	// kDebug() << "Codec is" << aCodec->name();
434	return aCodec->toUnicode( result );
435	}
436	}
437	return QLatin1String (result);
438	}
439
440	//-----------------------------------------------------------------------------
441	const QString KIMAP::encodeRFC2047String( const QString &str )
442	{
443	return QLatin1String (encodeRFC2047String( str.toLatin1() ));
444	}
445
446	//-----------------------------------------------------------------------------
447	const QByteArray KIMAP::encodeRFC2047String( const QByteArray &str )
448	{
449	if ( str.isEmpty () ) {
450	return str;
451	}
452
453	const signed char *latin =
454	reinterpret_cast<const signed char *>
455	( str.data() ), l, start, *stop;
456	char hexcode;
457	int numQuotes, i;
458	int rptr = `0`;
459	// My stats show this number results in 12 resize() out of 73,000
460	int resultLen = `3` * str.length() / `2`;
461	QByteArray result( resultLen, '\0' );
462
463	while ( *latin ) {
464	l = latin;
465	start = latin;
466	while ( *l ) {
467	if ( *l == `32` ) {
468	start = l + `1`;
469	}
470	if ( *l < `0` ) {
471	break;
472	}
473	l++;
474	}
475	if ( *l ) {
476	numQuotes = `1`;
477	while ( *l ) {
478	/ The encoded word must be limited to 75 character /
479	for ( i = `0`; i < `16`; ++i ) {
480	if ( *l == especials[i] ) {
481	numQuotes++;
482	}
483	}
484	if ( *l < `0` ) {
485	numQuotes++;
486	}
487	/ Stop after 58 = 75 - 17 characters or at "<user@host..." /
488	if ( l - start + `2` * numQuotes >= `58` \|\| *l == `60` ) {
489	break;
490	}
491	l++;
492	}
493	if ( *l ) {
494	stop = l - `1`;
495	while ( stop >= start && *stop != `32` ) {
496	stop--;
497	}
498	if ( stop <= start ) {
499	stop = l;
500	}
501	} else {
502	stop = l;
503	}
504	if ( resultLen - rptr - `1` <= start - latin + `1` + `16` ) {
505	// =?iso-88...
506	resultLen += ( start - latin + `1` ) * `2` + `20`; // more space
507	result.resize( resultLen );
508	}
509	while ( latin < start ) {
510	result [rptr++] = *latin;
511	latin++;
512	}
513	result.replace( rptr, `15`, "=?iso-8859-1?q?" );
514	rptr += `15`;
515	if ( resultLen - rptr - `1` <= `3` * ( stop - latin + `1` ) ) {
516	resultLen += ( stop - latin + `1` ) * `4` + `20`; // more space
517	result.resize( resultLen );
518	}
519	while ( latin < stop ) {
520	// can add up to 3 chars/iteration
521	numQuotes = `0`;
522	for ( i = `0`; i < `16`; ++i ) {
523	if ( *latin == especials[i] ) {
524	numQuotes = `1`;
525	}
526	}
527	if ( *latin < `0` ) {
528	numQuotes = `1`;
529	}
530	if ( numQuotes ) {
531	result [rptr++] = '=';
532	hexcode = ( ( *latin & `0xF0` ) >> `4` ) + `48`;
533	if ( hexcode >= `58` ) {
534	hexcode += `7`;
535	}
536	result [rptr++] = hexcode;
537	hexcode = ( *latin & `0x0F` ) + `48`;
538	if ( hexcode >= `58` ) {
539	hexcode += `7`;
540	}
541	result [rptr++] = hexcode;
542	} else {
543	result [rptr++] = *latin;
544	}
545	latin++;
546	}
547	result [rptr++] = '?';
548	result [rptr++] = '=';
549	} else {
550	while ( *latin ) {
551	if ( rptr == resultLen - `1` ) {
552	resultLen += `30`;
553	result.resize( resultLen );
554	}
555	result [rptr++] = *latin;
556	latin++;
557	}
558	}
559	}
560	result [rptr] = `0`;
561	return result;
562	}
563
564	//-----------------------------------------------------------------------------
565	const QString KIMAP::encodeRFC2231String( const QString &str )
566	{
567	if ( str.isEmpty () ) {
568	return str;
569	}
570
571	signed char latin = (signed* char *)calloc( `1`, str.length () + `1` );
572	char latin_us = (char* *)latin;
573	strcpy( latin_us, str.toLatin1 () );
574	signed char *l = latin;
575	char hexcode;
576	int i;
577	bool quote;
578	while ( *l ) {
579	if ( *l < `0` ) {
580	break;
581	}
582	l++;
583	}
584	if ( !*l ) {
585	free( latin );
586	return str;
587	}
588	QByteArray result;
589	l = latin;
590	while ( *l ) {
591	quote = *l < `0`;
592	for ( i = `0`; i < `16`; ++i ) {
593	if ( *l == especials[i] ) {
594	quote = true;
595	}
596	}
597	if ( quote ) {
598	result += '%';
599	hexcode = ( ( *l & `0xF0` ) >> `4` ) + `48`;
600	if ( hexcode >= `58` ) {
601	hexcode += `7`;
602	}
603	result += hexcode;
604	hexcode = ( *l & `0x0F` ) + `48`;
605	if ( hexcode >= `58` ) {
606	hexcode += `7`;
607	}
608	result += hexcode;
609	} else {
610	result += *l;
611	}
612	l++;
613	}
614	free( latin );
615	return QLatin1String (result);
616	}
617
618	//-----------------------------------------------------------------------------
619	const QString KIMAP::decodeRFC2231String( const QString &str )
620	{
621	int p = str.indexOf ( QLatin1Char ('\'') );
622
623	//see if it is an rfc string
624	if ( p < `0` ) {
625	return str;
626	}
627
628	int l = str.lastIndexOf( QLatin1Char ('\'') );
629
630	//second is language
631	if ( p >= l ) {
632	return str;
633	}
634
635	//first is charset or empty
636	//QString charset = str.left ( p );
637	QString st = str.mid ( l + `1` );
638	//QString language = str.mid ( p + 1, l - p - 1 );
639
640	//kDebug() << "Charset:" << charset << "Language:" << language;
641
642	char ch, ch2;
643	p = `0`;
644	while ( p < (int) st.length () ) {
645	if ( st.at( p ) == `37` ) {
646	ch = st.at( p + `1` ).toLatin1 () - `48`;
647	if ( ch > `16` ) {
648	ch -= `7`;
649	}
650	ch2 = st.at( p + `2` ).toLatin1 () - `48`;
651	if ( ch2 > `16` ) {
652	ch2 -= `7`;
653	}
654	st.replace( p, `1`, ch * `16` + ch2 );
655	st.remove ( p + `1`, `2` );
656	}
657	p++;
658	}
659	return st;
660	}
661