kmime_util.cpp [kdepimlibs/kmime/kmime_util.cpp]

1	/*
2	kmime_util.cpp
3
4	KMime, the KDE Internet mail/usenet news message library.
5	Copyright (c) 2001 the KMime authors.
6	See file AUTHORS for details
7
8	This library is free software; you can redistribute it and/or
9	modify it under the terms of the GNU Library General Public
10	License as published by the Free Software Foundation; either
11	version 2 of the License, or (at your option) any later version.
12
13	This library is distributed in the hope that it will be useful,
14	but WITHOUT ANY WARRANTY; without even the implied warranty of
15	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16	Library General Public License for more details.
17
18	You should have received a copy of the GNU Library General Public License
19	along with this library; see the file COPYING.LIB. If not, write to
20	the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
21	Boston, MA 02110-1301, USA.
22	*/
23
24	#include "kmime_util.h"
25	#include "kmime_util_p.h"
26
27	#include "kmime_charfreq.h"
28	#include "kmime_codecs.h"
29	#include "kmime_header_parsing.h"
30	#include "kmime_message.h"
31	#include "kmime_warning.h"
32
33	#include <config-kmime.h>
34	#include <kdefakes.h> // for strcasestr
35	#include <kglobal.h>
36	#include <klocale.h>
37	#include <klocalizedstring.h>
38	#include <kcharsets.h>
39	#include <kcodecs.h>
40	#include <kdebug.h>
41
42	#include <QtCore/QList>
43	#include <QtCore/QString>
44	#include <QtCore/QTextCodec>
45
46	#include <ctype.h>
47	#include <time.h>
48	#include <stdlib.h>
49	#include <unistd.h>
50
51	using namespace KMime;
52
53	namespace KMime {
54
55	QList<QByteArray> c_harsetCache;
56	QList<QByteArray> l_anguageCache;
57	QString f_allbackCharEnc;
58	bool u_seOutlookEncoding = false;
59
60	QByteArray cachedCharset( const QByteArray &name )
61	{
62	foreach ( const QByteArray& charset, c_harsetCache ) {
63	if ( qstricmp( name.data(), charset.data() ) == `0` ) {
64	return charset;
65	}
66	}
67
68	c_harsetCache.append( name.toUpper() );
69	//kDebug() << "KNMimeBase::cachedCharset() number of cs" << c_harsetCache.count();
70	return c_harsetCache.last();
71	}
72
73	QByteArray cachedLanguage( const QByteArray &name )
74	{
75	foreach ( const QByteArray& language, l_anguageCache ) {
76	if ( qstricmp( name.data(), language.data() ) == `0` ) {
77	return language;
78	}
79	}
80
81	l_anguageCache.append( name.toUpper() );
82	//kDebug() << "KNMimeBase::cachedCharset() number of cs" << c_harsetCache.count();
83	return l_anguageCache.last();
84	}
85
86	bool isUsAscii( const QString &s )
87	{
88	uint sLength = s.length();
89	for ( uint i=`0`; i<sLength; i++ ) {
90	if ( s.at( i ).toLatin1() <= `0` ) { // c==0: non-latin1, c<0: non-us-ascii
91	return false;
92	}
93	}
94	return true;
95	}
96
97	QString nameForEncoding( Headers::contentEncoding enc )
98	{
99	switch ( enc ) {
100	case Headers::CE7Bit: return QString::fromLatin1( "7bit" );
101	case Headers::CE8Bit: return QString::fromLatin1( "8bit" );
102	case Headers::CEquPr: return QString::fromLatin1( "quoted-printable" );
103	case Headers::CEbase64: return QString::fromLatin1( "base64" );
104	case Headers::CEuuenc: return QString::fromLatin1( "uuencode" );
105	case Headers::CEbinary: return QString::fromLatin1( "binary" );
106	default: return QString::fromLatin1( "unknown" );
107	}
108	}
109
110	QList<Headers::contentEncoding> encodingsForData( const QByteArray &data )
111	{
112	QList<Headers::contentEncoding> allowed;
113	CharFreq cf( data );
114
115	switch ( cf.type() ) {
116	case CharFreq::SevenBitText:
117	allowed << Headers::CE7Bit;
118	case CharFreq::EightBitText:
119	allowed << Headers::CE8Bit;
120	case CharFreq::SevenBitData:
121	if ( cf.printableRatio() > `5.0`/`6.0` ) {
122	// let n the length of data and p the number of printable chars.
123	// Then base64 \approx 4n/3; qp \approx p + 3(n-p)
124	// => qp < base64 iff p > 5n/6.
125	allowed << Headers::CEquPr;
126	allowed << Headers::CEbase64;
127	} else {
128	allowed << Headers::CEbase64;
129	allowed << Headers::CEquPr;
130	}
131	break;
132	case CharFreq::EightBitData:
133	allowed << Headers::CEbase64;
134	break;
135	case CharFreq::None:
136	default:
137	Q_ASSERT( false );
138	}
139
140	return allowed;
141	}
142
143	// "(),.:;<>@[\]
144	const uchar specialsMap[`16`] = {
145	`0x00`, `0x00`, `0x00`, `0x00`, // CTLs
146	`0x20`, `0xCA`, `0x00`, `0x3A`, // SPACE ... '?'
147	`0x80`, `0x00`, `0x00`, `0x1C`, // '@' ... '_'
148	`0x00`, `0x00`, `0x00`, `0x00` // '`' ... DEL
149	};
150
151	// "(),:;<>@[\]/=?
152	const uchar tSpecialsMap[`16`] = {
153	`0x00`, `0x00`, `0x00`, `0x00`, // CTLs
154	`0x20`, `0xC9`, `0x00`, `0x3F`, // SPACE ... '?'
155	`0x80`, `0x00`, `0x00`, `0x1C`, // '@' ... '_'
156	`0x00`, `0x00`, `0x00`, `0x00` // '`' ... DEL
157	};
158
159	// all except specials, CTLs, SPACE.
160	const uchar aTextMap[`16`] = {
161	`0x00`, `0x00`, `0x00`, `0x00`,
162	`0x5F`, `0x35`, `0xFF`, `0xC5`,
163	`0x7F`, `0xFF`, `0xFF`, `0xE3`,
164	`0xFF`, `0xFF`, `0xFF`, `0xFE`
165	};
166
167	// all except tspecials, CTLs, SPACE.
168	const uchar tTextMap[`16`] = {
169	`0x00`, `0x00`, `0x00`, `0x00`,
170	`0x5F`, `0x36`, `0xFF`, `0xC0`,
171	`0x7F`, `0xFF`, `0xFF`, `0xE3`,
172	`0xFF`, `0xFF`, `0xFF`, `0xFE`
173	};
174
175	// none except a-zA-Z0-9!+-/*
176	const uchar eTextMap[`16`] = {
177	`0x00`, `0x00`, `0x00`, `0x00`,
178	`0x40`, `0x35`, `0xFF`, `0xC0`,
179	`0x7F`, `0xFF`, `0xFF`, `0xE0`,
180	`0x7F`, `0xFF`, `0xFF`, `0xE0`
181	};
182
183	void setFallbackCharEncoding(const QString& fallbackCharEnc)
184	{
185	f_allbackCharEnc = fallbackCharEnc;
186	}
187
188	QString fallbackCharEncoding()
189	{
190	return f_allbackCharEnc;
191	}
192
193	void setUseOutlookAttachmentEncoding( bool violateStandard )
194	{
195	u_seOutlookEncoding = violateStandard;
196	}
197
198	bool useOutlookAttachmentEncoding()
199	{
200	return u_seOutlookEncoding;
201	}
202
203
204	QString decodeRFC2047String( const QByteArray &src, QByteArray &usedCS,
205	const QByteArray &defaultCS, bool forceCS )
206	{
207	QByteArray result;
208	QByteArray spaceBuffer;
209	const char *scursor = src.constData();
210	const char *send = scursor + src.length();
211	bool onlySpacesSinceLastWord = false;
212
213	while ( scursor != send ) {
214	// space
215	if ( isspace( *scursor ) && onlySpacesSinceLastWord ) {
216	spaceBuffer += *scursor++;
217	continue;
218	}
219
220	// possible start of an encoded word
221	if ( *scursor == '=' ) {
222	QByteArray language;
223	QString decoded;
224	++scursor;
225	const char *start = scursor;
226	if ( HeaderParsing::parseEncodedWord( scursor, send, decoded, language, usedCS, defaultCS, forceCS ) ) {
227	result += decoded.toUtf8();
228	onlySpacesSinceLastWord = true;
229	spaceBuffer.clear();
230	} else {
231	if ( onlySpacesSinceLastWord ) {
232	result += spaceBuffer;
233	onlySpacesSinceLastWord = false;
234	}
235	result += '=';
236	scursor = start; // reset cursor after parsing failure
237	}
238	continue;
239	} else {
240	// unencoded data
241	if ( onlySpacesSinceLastWord ) {
242	result += spaceBuffer;
243	onlySpacesSinceLastWord = false;
244	}
245	result += *scursor;
246	++scursor;
247	}
248	}
249	// If there are any chars that couldn't be decoded in UTF-8,
250	// use the fallback charset if it exists
251	const QString tryUtf8 = QString::fromUtf8( result );
252	if ( tryUtf8.contains( `0xFFFD` ) && !f_allbackCharEnc.isEmpty() ) {
253	QTextCodec* codec = KGlobal::charsets()->codecForName( f_allbackCharEnc );
254	return codec->toUnicode( result );
255	} else {
256	return tryUtf8;
257	}
258	}
259
260	QString decodeRFC2047String( const QByteArray &src )
261	{
262	QByteArray usedCS;
263	return decodeRFC2047String( src, usedCS, "utf-8", false );
264	}
265
266	static const char *reservedCharacters = "\"()<>@,.;:\\[]=";
267
268	QByteArray encodeRFC2047String( const QString &src, const QByteArray &charset,
269	bool addressHeader, bool allow8BitHeaders )
270	{
271	QByteArray result;
272	int start=`0`, end=`0`;
273	bool nonAscii=false, ok=true, useQEncoding=false;
274
275	// fromLatin1() is safe here, codecForName() uses toLatin1() internally
276	const QTextCodec *codec = KGlobal::charsets()->codecForName( QString::fromLatin1( charset ), ok );
277
278	QByteArray usedCS;
279	if ( !ok ) {
280	//no codec available => try local8Bit and hope the best ;-)
281	usedCS = KGlobal::locale()->encoding();
282	codec = KGlobal::charsets()->codecForName( QString::fromLatin1( usedCS ), ok );
283	} else {
284	Q_ASSERT( codec );
285	if ( charset.isEmpty() ) {
286	usedCS = codec->name();
287	} else {
288	usedCS = charset;
289	}
290	}
291
292	QTextCodec::ConverterState converterState( QTextCodec::IgnoreHeader );
293	QByteArray encoded8Bit = codec->fromUnicode( src.constData(), src.length(), &converterState );
294	if ( converterState.invalidChars > `0` ) {
295	usedCS = "utf-8";
296	codec = QTextCodec::codecForName( usedCS );
297	encoded8Bit = codec->fromUnicode( src );
298	}
299
300	if ( usedCS.contains( "8859-" ) ) { // use "B"-Encoding for non iso-8859-x charsets
301	useQEncoding = true;
302	}
303
304	if ( allow8BitHeaders ) {
305	return encoded8Bit;
306	}
307
308	uint encoded8BitLength = encoded8Bit.length();
309	for ( unsigned int i=`0`; i<encoded8BitLength; i++ ) {
310	if ( encoded8Bit [i] == ' ' ) { // encoding starts at word boundaries
311	start = i + `1`;
312	}
313
314	// encode escape character, for japanese encodings...
315	if ( ( (signed char)encoded8Bit [i] < `0` ) \|\| ( encoded8Bit [i] == '\033' ) \|\|
316	( addressHeader && ( strchr( "\"()<>@,.;:\\[]=", encoded8Bit [i] ) != `0` ) ) ) {
317	end = start; // non us-ascii char found, now we determine where to stop encoding
318	nonAscii = true;
319	break;
320	}
321	}
322
323	if ( nonAscii ) {
324	while ( ( end < encoded8Bit.length() ) && ( encoded8Bit [end] != ' ' ) ) {
325	// we encode complete words
326	end++;
327	}
328
329	for ( int x=end; x<encoded8Bit.length(); x++ ) {
330	if ( ( (signed char)encoded8Bit [x] < `0` ) \|\| ( encoded8Bit [x] == '\033' ) \|\|
331	( addressHeader && ( strchr( reservedCharacters, encoded8Bit [x] ) != `0` ) ) ) {
332	end = x; // we found another non-ascii word
333
334	while ( ( end < encoded8Bit.length() ) && ( encoded8Bit [end] != ' ' ) ) {
335	// we encode complete words
336	end++;
337	}
338	}
339	}
340
341	result = encoded8Bit.left( start ) + "=?" + usedCS;
342
343	if ( useQEncoding ) {
344	result += "?Q?";
345
346	char c, hexcode;// "Q"-encoding implementation described in RFC 2047
347	for ( int i=start; i<end; i++ ) {
348	c = encoded8Bit [i];
349	if ( c == ' ' ) { // make the result readable with not MIME-capable readers
350	result += '_';
351	} else {
352	if ( ( ( c >= 'a' ) && ( c <= 'z' ) ) \|\| // paranoid mode, encode all* special chars to avoid problems*
353	( ( c >= 'A' ) && ( c <= 'Z' ) ) \|\| // with "From" & "To" headers
354	( ( c >= '0' ) && ( c <= '9' ) ) ) {
355	result += c;
356	} else {
357	result += '='; // "stolen" from KMail ;-)
358	hexcode = ( ( c & `0xF0` ) >> `4` ) + `48`;
359	if ( hexcode >= `58` ) {
360	hexcode += `7`;
361	}
362	result += hexcode;
363	hexcode = ( c & `0x0F` ) + `48`;
364	if ( hexcode >= `58` ) {
365	hexcode += `7`;
366	}
367	result += hexcode;
368	}
369	}
370	}
371	} else {
372	result += "?B?" + encoded8Bit.mid( start, end - start ).toBase64();
373	}
374
375	result +="?=";
376	result += encoded8Bit.right( encoded8Bit.length() - end );
377	} else {
378	result = encoded8Bit;
379	}
380
381	return result;
382	}
383
384	QByteArray encodeRFC2047Sentence(const QString& src, const QByteArray& charset )
385	{
386	QByteArray result;
387	QList<QChar> splitChars;
388	splitChars << QLatin1Char ( ',' ) << QLatin1Char ( '\"' ) << QLatin1Char ( ';' ) << QLatin1Char ( '\\' );
389	const QChar *ch = src.constData();
390	const int length = src.length();
391	int pos = `0`;
392	int wordStart = `0`;
393
394	//qDebug() << "Input:" << src;
395	// Loop over all characters of the string.
396	// When encountering a split character, RFC-2047-encode the word before it, and add it to the result.
397	while ( pos < length ) {
398	//qDebug() << "Pos:" << pos << "Result:" << result << "Char:" << ch->toLatin1();
399	const bool isAscii = ch->unicode() < `127`;
400	const bool isReserved = ( strchr( reservedCharacters, ch->toLatin1() ) != `0` );
401	if ( isAscii && isReserved ) {
402	const int wordSize = pos - wordStart;
403	if ( wordSize > `0` ) {
404	const QString word = src.mid( wordStart, wordSize );
405	result += encodeRFC2047String( word, charset );
406	}
407
408	result += ch->toLatin1();
409	wordStart = pos + `1`;
410	}
411	ch++;
412	pos++;
413	}
414
415	// Encode the last word
416	const int wordSize = pos - wordStart;
417	if ( wordSize > `0` ) {
418	const QString word = src.mid( wordStart, pos - wordStart );
419	result += encodeRFC2047String( word, charset );
420	}
421
422	return result;
423	}
424
425
426
427	//-----------------------------------------------------------------------------
428	QByteArray encodeRFC2231String( const QString& str, const QByteArray& charset )
429	{
430	if ( str.isEmpty() ) {
431	return QByteArray ();
432	}
433
434	const QTextCodec *codec = KGlobal::charsets()->codecForName( QString::fromLatin1( charset ) );
435	QByteArray latin;
436	if ( charset == "us-ascii" ) {
437	latin = str.toLatin1();
438	} else if ( codec ) {
439	latin = codec->fromUnicode( str );
440	} else {
441	latin = str.toLocal8Bit();
442	}
443
444	char *l;
445	for ( l = latin.data(); *l; ++l ) {
446	if ( ( ( l & `0xE0` ) == `0` ) \|\| ( l & `0x80` ) ) {
447	// l is control character or 8-bit char*
448	break;
449	}
450	}
451	if ( !*l ) {
452	return latin;
453	}
454
455	QByteArray result = charset + "''";
456	for ( l = latin.data(); *l; ++l ) {
457	bool needsQuoting = ( l & `0x80` ) \|\| ( l == '%' );
458	if ( !needsQuoting ) {
459	const QByteArray especials = "()<>@,;:\"/[]?.= \033";
460	int len = especials.length();
461	for ( int i = `0`; i < len; i++ ) {
462	if ( *l == especials [i] ) {
463	needsQuoting = true;
464	break;
465	}
466	}
467	}
468	if ( needsQuoting ) {
469	result += '%';
470	unsigned char hexcode;
471	hexcode = ( ( *l & `0xF0` ) >> `4` ) + `48`;
472	if ( hexcode >= `58` ) {
473	hexcode += `7`;
474	}
475	result += hexcode;
476	hexcode = ( *l & `0x0F` ) + `48`;
477	if ( hexcode >= `58` ) {
478	hexcode += `7`;
479	}
480	result += hexcode;
481	} else {
482	result += *l;
483	}
484	}
485	return result;
486	}
487
488
489	//-----------------------------------------------------------------------------
490	QString decodeRFC2231String( const QByteArray &str, QByteArray &usedCS, const QByteArray &defaultCS,
491	bool forceCS )
492	{
493	int p = str.indexOf( '\'' );
494	if ( p < `0` ) {
495	return KGlobal::charsets()->codecForName( QString::fromLatin1( defaultCS ) )->toUnicode( str );
496	}
497
498
499	QByteArray charset = str.left( p );
500
501	QByteArray st = str.mid( str.lastIndexOf( '\'' ) + `1` );
502
503	char ch, ch2;
504	p = `0`;
505	while ( p < (int)st.length() ) {
506	if ( st.at( p ) == `37` ) {
507	// Only try to decode the percent-encoded character if the percent sign
508	// is really followed by two other characters, see testcase at bug 163024
509	if ( p + `2` < st.length() ) {
510	ch = st.at( p + `1` ) - `48`;
511	if ( ch > `16` ) {
512	ch -= `7`;
513	}
514	ch2 = st.at( p + `2` ) - `48`;
515	if ( ch2 > `16` ) {
516	ch2 -= `7`;
517	}
518	st [p] = ch * `16` + ch2;
519	st.remove( p + `1`, `2` );
520	}
521	}
522	p++;
523	}
524	kDebug() << "Got pre-decoded:" << st;
525	QString result;
526	const QTextCodec * charsetcodec = KGlobal::charsets()->codecForName( QString::fromLatin1( charset ) );
527	if ( !charsetcodec \|\| forceCS ) {
528	charsetcodec = KGlobal::charsets()->codecForName( QString::fromLatin1( defaultCS ) );
529	}
530
531	usedCS = charsetcodec->name();
532	return charsetcodec->toUnicode( st );
533	}
534
535	QString decodeRFC2231String( const QByteArray &src )
536	{
537	QByteArray usedCS;
538	return decodeRFC2231String( src, usedCS, "utf-8", false );
539	}
540
541	QByteArray uniqueString()
542	{
543	static char chars[] = "0123456789abcdefghijklmnopqrstuvxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
544	time_t now;
545	char p[`11`];
546	int pos, ran;
547	unsigned int timeval;
548
549	p[`10`] = '\0';
550	now = time( `0` );
551	ran = `1` + (int)( `1000.0` * rand() / ( RAND_MAX + `1.0` ) );
552	timeval = ( now / ran ) + getpid();
553
554	for ( int i = `0`; i < `10`; i++ ) {
555	pos = (int) ( `61.0` * rand() / ( RAND_MAX + `1.0` ) );
556	//kDebug() << pos;
557	p[i] = chars[pos];
558	}
559
560	QByteArray ret;
561	ret.setNum( timeval );
562	ret += '.';
563	ret += p;
564
565	return ret;
566	}
567
568	QByteArray multiPartBoundary()
569	{
570	return "nextPart" + uniqueString();
571	}
572
573	QByteArray unfoldHeader( const QByteArray &header )
574	{
575	QByteArray result;
576	if ( header.isEmpty() ) {
577	return result;
578	}
579
580	int pos = `0`, foldBegin = `0`, foldMid = `0`, foldEnd = `0`;
581	while ( ( foldMid = header.indexOf( '\n', pos ) ) >= `0` ) {
582	foldBegin = foldEnd = foldMid;
583	// find the first space before the line-break
584	while ( foldBegin > `0` ) {
585	if ( !QChar::fromLatin1( header [foldBegin - `1`] ).isSpace() ) {
586	break;
587	}
588	--foldBegin;
589	}
590	// find the first non-space after the line-break
591	while ( foldEnd <= header.length() - `1` ) {
592	if ( QChar::fromLatin1( header [foldEnd] ).isSpace() ) {
593	++foldEnd;
594	} else if ( foldEnd > `0` && header [foldEnd - `1`] == '\n' &&
595	header [foldEnd] == '=' && foldEnd + `2` < header.length() &&
596	( ( header [foldEnd + `1`] == '0' &&
597	header [foldEnd + `2`] == '9' ) \|\|
598	( header [foldEnd + `1`] == '2' &&
599	header [foldEnd + `2`] == '0' ) ) ) {
600	// bug #86302: malformed header continuation starting with =09/=20
601	foldEnd += `3`;
602	}
603	else {
604	break;
605	}
606	}
607
608	result += header.mid( pos, foldBegin - pos );
609	if ( foldEnd < header.length() - `1` ) {
610	result += ' ';
611	}
612	pos = foldEnd;
613	}
614	const int len = header.length();
615	if ( len > pos ) {
616	result += header.mid( pos, len - pos );
617	}
618	return result;
619	}
620
621	int findHeaderLineEnd( const QByteArray &src, int &dataBegin, bool *folded )
622	{
623	int end = dataBegin;
624	int len = src.length() - `1`;
625
626	if ( folded ) {
627	folded = false*;
628	}
629
630	if ( dataBegin < `0` ) {
631	// Not found
632	return -`1`;
633	}
634
635	if ( dataBegin > len ) {
636	// No data available
637	return len + `1`;
638	}
639
640	// If the first line contains nothing, but the next line starts with a space
641	// or a tab, that means a stupid mail client has made the first header field line
642	// entirely empty, and has folded the rest to the next line(s).
643	if ( src.at( end ) == '\n' && end + `1` < len &&
644	( src [end + `1`] == ' ' \|\| src [end + `1`] == '\t' ) ) {
645
646	// Skip \n and first whitespace
647	dataBegin += `2`;
648	end += `2`;
649	}
650
651	if ( src.at( end ) != '\n' ) { // check if the header is not empty
652	while ( true ) {
653	end = src.indexOf( '\n', end + `1` );
654	if ( end == -`1` \|\| end == len ) {
655	// end of string
656	break;
657	} else if ( src [end + `1`] == ' ' \|\| src [end + `1`] == '\t' \|\|
658	( src [end + `1`] == '=' && end + `3` <= len &&
659	( ( src [end + `2`] == '0' && src [end + `3`] == '9' ) \|\|
660	( src [end + `2`] == '2' && src [end + `3`] == '0' ) ) ) ) {
661	// next line is header continuation or starts with =09/=20 (bug #86302)
662	if ( folded ) {
663	folded = true*;
664	}
665	} else {
666	// end of header (no header continuation)
667	break;
668	}
669	}
670	}
671
672	if ( end < `0` ) {
673	end = len + `1`; //take the rest of the string
674	}
675	return end;
676	}
677
678	int indexOfHeader( const QByteArray &src, const QByteArray &name, int &end, int &dataBegin, bool *folded )
679	{
680	QByteArray n = name;
681	n.append( ':' );
682	int begin = -`1`;
683
684	if ( qstrnicmp( n.constData(), src.constData(), n.length() ) == `0` ) {
685	begin = `0`;
686	} else {
687	n.prepend( '\n' );
688	const char *p = strcasestr( src.constData(), n.constData() );
689	if ( !p ) {
690	begin = -`1`;
691	} else {
692	begin = p - src.constData();
693	++begin;
694	}
695	}
696
697	if ( begin > -`1` ) { //there is a header with the given name
698	dataBegin = begin + name.length() + `1`; //skip the name
699	// skip the usual space after the colon
700	if ( src.at( dataBegin ) == ' ' ) {
701	++dataBegin;
702	}
703	end = findHeaderLineEnd( src, dataBegin, folded );
704	return begin;
705
706	} else {
707	end = -`1`;
708	dataBegin = -`1`;
709	return -`1`; //header not found
710	}
711	}
712
713	QByteArray extractHeader( const QByteArray &src, const QByteArray &name )
714	{
715	int begin, end;
716	bool folded;
717	QByteArray result;
718
719	if ( src.isEmpty() \|\| indexOfHeader( src, name, end, begin, &folded ) < `0` ) {
720	return result;
721	}
722
723	if ( begin >= `0` ) {
724	if ( !folded ) {
725	result = src.mid( begin, end - begin );
726	} else {
727	if ( end > begin ) {
728	QByteArray hdrValue = src.mid( begin, end - begin );
729	result = unfoldHeader( hdrValue );
730	}
731	}
732	}
733	return result;
734	}
735
736	QList<QByteArray> extractHeaders( const QByteArray &src, const QByteArray &name )
737	{
738	int begin, end;
739	bool folded;
740	QList<QByteArray> result;
741	QByteArray copySrc( src );
742
743	if ( indexOfHeader( copySrc, name, end, begin, &folded ) < `0` ) {
744	return result;
745	}
746
747	while ( begin >= `0` ) {
748	if ( !folded ) {
749	result.append( copySrc.mid( begin, end - begin ) );
750	} else {
751	QByteArray hdrValue = copySrc.mid( begin, end - begin );
752	result.append( unfoldHeader( hdrValue ) );
753	}
754
755	// get the next one, a tiny bit ugly, but we don't want the previous to be found again...
756	copySrc = copySrc.mid( end );
757	if ( indexOfHeader( copySrc, name, end, begin, &folded ) < `0` ) {
758	break;
759	}
760	}
761	return result;
762	}
763
764	void removeHeader( QByteArray &header, const QByteArray &name )
765	{
766	int begin, end, dummy;
767	begin = indexOfHeader( header, name, end, dummy );
768	if ( begin >= `0` ) {
769	header.remove( begin, end - begin + `1` );
770	}
771	}
772
773	QByteArray CRLFtoLF( const QByteArray &s )
774	{
775	QByteArray ret = s;
776	ret.replace( "\r\n", "\n" );
777	return ret;
778	}
779
780	QByteArray CRLFtoLF( const char *s )
781	{
782	QByteArray ret = s;
783	return CRLFtoLF( ret );
784	}
785
786	QByteArray LFtoCRLF( const QByteArray &s )
787	{
788	QByteArray ret = s;
789	ret.replace( '\n', "\r\n" );
790	return ret;
791	}
792
793	QByteArray LFtoCRLF( const char *s )
794	{
795	QByteArray ret = s;
796	return LFtoCRLF( ret );
797	}
798
799	namespace {
800	template < typename StringType, typename CharType > void removeQuotesGeneric( StringType & str )
801	{
802	bool inQuote = false;
803	for ( int i = `0`; i < str.length(); ++i ) {
804	if ( str[i] == CharType( '"' ) ) {
805	str.remove( i, `1` );
806	i--;
807	inQuote = !inQuote;
808	} else {
809	if ( inQuote && ( str[i] == CharType( '\\' ) ) ) {
810	str.remove( i, `1` );
811	}
812	}
813	}
814	}
815	}
816
817	void removeQuots( QByteArray &str )
818	{
819	removeQuotesGeneric<QByteArray, char>( str );
820	}
821
822	void removeQuots( QString &str )
823	{
824	removeQuotesGeneric<QString, QLatin1Char>( str );
825	}
826
827	template<class StringType,class CharType,class CharConverterType,class StringConverterType,class ToString>
828	void addQuotes_impl( StringType &str, bool forceQuotes )
829	{
830	bool needsQuotes=false;
831	for ( int i=`0`; i < str.length(); i++ ) {
832	const CharType cur = str.at( i );
833	if ( QString( ToString( str ) ).contains( QRegExp( QLatin1String ( "\"\|\\\\\|=\|\\]\|\\[\|:\|;\|,\|\\.\|,\|@\|<\|>\|\\)\|\\(" ) ) ) ) {
834	needsQuotes = true;
835	}
836	if ( cur == CharConverterType( '\\' ) \|\| cur == CharConverterType( '\"' ) ) {
837	str.insert( i, CharConverterType( '\\' ) );
838	i++;
839	}
840	}
841
842	if ( needsQuotes \|\| forceQuotes ) {
843	str.insert( `0`, CharConverterType( '\"' ) );
844	str.append( StringConverterType( "\"" ) );
845	}
846	}
847
848	void addQuotes( QByteArray &str, bool forceQuotes )
849	{
850	addQuotes_impl<QByteArray, char, char, char*, QLatin1String>( str, forceQuotes );
851	}
852
853	void addQuotes( QString &str, bool forceQuotes )
854	{
855	addQuotes_impl<QString, QChar, QLatin1Char, QLatin1String, QString>( str, forceQuotes );
856	}
857
858	KMIME_EXPORT QString balanceBidiState( const QString &input )
859	{
860	const int LRO = `0x202D`;
861	const int RLO = `0x202E`;
862	const int LRE = `0x202A`;
863	const int RLE = `0x202B`;
864	const int PDF = `0x202C`;
865
866	QString result = input;
867
868	int openDirChangers = `0`;
869	int numPDFsRemoved = `0`;
870	for ( int i = `0`; i < input.length(); i++ ) {
871	const ushort &code = input.at( i ).unicode();
872	if ( code == LRO \|\| code == RLO \|\| code == LRE \|\| code == RLE ) {
873	openDirChangers++;
874	} else if ( code == PDF ) {
875	if ( openDirChangers > `0` ) {
876	openDirChangers--;
877	} else {
878	// One PDF too much, remove it
879	kWarning () << "Possible Unicode spoofing (unexpected PDF) detected in" << input;
880	result.remove( i - numPDFsRemoved, `1` );
881	numPDFsRemoved++;
882	}
883	}
884	}
885
886	if ( openDirChangers > `0` ) {
887	kWarning () << "Possible Unicode spoofing detected in" << input;
888
889	// At PDF chars to the end until the correct state is restored.
890	// As a special exception, when encountering quoted strings, place the PDF before
891	// the last quote.
892	for ( int i = openDirChangers; i > `0`; i-- ) {
893	if ( result.endsWith( QLatin1Char ( '"' ) ) ) {
894	result.insert( result.length() - `1`, QChar ( PDF ) );
895	} else {
896	result += QChar ( PDF );
897	}
898	}
899	}
900
901	return result;
902	}
903
904	QString removeBidiControlChars( const QString &input )
905	{
906	const int LRO = `0x202D`;
907	const int RLO = `0x202E`;
908	const int LRE = `0x202A`;
909	const int RLE = `0x202B`;
910	QString result = input;
911	result.remove( LRO );
912	result.remove( RLO );
913	result.remove( LRE );
914	result.remove( RLE );
915	return result;
916	}
917
918	static bool isCryptoPart( Content* content )
919	{
920	if ( !content->contentType( false ) ) {
921	return false;
922	}
923
924	if ( content->contentType()->subType().toLower() == "octet-stream" &&
925	!content->contentDisposition( false ) ) {
926	return false;
927	}
928
929	const Headers::ContentType *contentType = content->contentType();
930	const QByteArray lowerSubType = contentType->subType().toLower();
931	return ( contentType->mediaType().toLower() == "application" &&
932	( lowerSubType == "pgp-encrypted" \|\|
933	lowerSubType == "pgp-signature" \|\|
934	lowerSubType == "pkcs7-mime" \|\|
935	lowerSubType == "pkcs7-signature" \|\|
936	lowerSubType == "x-pkcs7-signature" \|\|
937	( lowerSubType == "octet-stream" &&
938	content->contentDisposition()->filename().toLower() == QLatin1String ( "msg.asc" ) ) ) );
939	}
940
941	bool hasAttachment( Content* content )
942	{
943	if ( !content ) {
944	return false;
945	}
946
947	bool emptyFilename = true;
948	if ( content->contentDisposition( false ) &&
949	!content->contentDisposition()->filename().isEmpty() ) {
950	emptyFilename = false;
951	}
952
953	if ( emptyFilename &&
954	content->contentType( false ) &&
955	!content->contentType()->name().isEmpty() ) {
956	emptyFilename = false;
957	}
958
959	// ignore crypto parts
960	if ( !emptyFilename && !isCryptoPart( content ) ) {
961	return true;
962	}
963
964	// Ok, content itself is not an attachment. now we deal with multiparts
965	if ( content->contentType()->isMultipart() ) {
966	Q_FOREACH ( Content *child, content->contents() ) {
967	if ( hasAttachment( child ) ) {
968	return true;
969	}
970	}
971	}
972	return false;
973	}
974
975	bool isSigned( Message *message )
976	{
977	if ( !message ) {
978	return false;
979	}
980
981	const KMime::Headers::ContentType* const contentType = message->contentType();
982	if ( contentType->isSubtype( "signed" ) \|\|
983	contentType->isSubtype( "pgp-signature" ) \|\|
984	contentType->isSubtype( "pkcs7-signature" ) \|\|
985	contentType->isSubtype( "x-pkcs7-signature" ) \|\|
986	message->mainBodyPart( "multipart/signed" ) \|\|
987	message->mainBodyPart( "application/pgp-signature" ) \|\|
988	message->mainBodyPart( "application/pkcs7-signature" ) \|\|
989	message->mainBodyPart( "application/x-pkcs7-signature" ) ) {
990	return true;
991	}
992	return false;
993	}
994
995	bool isEncrypted( Message *message )
996	{
997	if ( !message ) {
998	return false;
999	}
1000
1001	const KMime::Headers::ContentType* const contentType = message->contentType();
1002	if ( contentType->isSubtype( "encrypted" ) \|\|
1003	contentType->isSubtype( "pgp-encrypted" ) \|\|
1004	contentType->isSubtype( "pkcs7-mime" ) \|\|
1005	message->mainBodyPart( "multipart/encrypted" ) \|\|
1006	message->mainBodyPart( "application/pgp-encrypted" ) \|\|
1007	message->mainBodyPart( "application/pkcs7-mime" ) ) {
1008	return true;
1009	}
1010
1011	return false;
1012	}
1013
1014	bool isInvitation( Content *content )
1015	{
1016	if ( !content ) {
1017	return false;
1018	}
1019
1020	const KMime::Headers::ContentType* const contentType = content->contentType( false );
1021
1022	if ( contentType && contentType->isMediatype( "text" ) && contentType->isSubtype( "calendar" ) ) {
1023	return true;
1024	}
1025
1026	return false;
1027	}
1028
1029	} // namespace KMime
1030