1/*
2 kmime_util.cpp
3
4 KMime, the KDE Internet mail/usenet news message library.
5 Copyright (c) 2001 the KMime authors.
6 See file AUTHORS for details
7
8 This library is free software; you can redistribute it and/or
9 modify it under the terms of the GNU Library General Public
10 License as published by the Free Software Foundation; either
11 version 2 of the License, or (at your option) any later version.
12
13 This library is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Library General Public License for more details.
17
18 You should have received a copy of the GNU Library General Public License
19 along with this library; see the file COPYING.LIB. If not, write to
20 the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
21 Boston, MA 02110-1301, USA.
22*/
23
24#include "kmime_util.h"
25#include "kmime_util_p.h"
26
27#include "kmime_charfreq.h"
28#include "kmime_codecs.h"
29#include "kmime_header_parsing.h"
30#include "kmime_message.h"
31#include "kmime_warning.h"
32
33#include <config-kmime.h>
34#include <kdefakes.h> // for strcasestr
35#include <kglobal.h>
36#include <klocale.h>
37#include <klocalizedstring.h>
38#include <kcharsets.h>
39#include <kcodecs.h>
40#include <kdebug.h>
41
42#include <QtCore/QList>
43#include <QtCore/QString>
44#include <QtCore/QTextCodec>
45
46#include <ctype.h>
47#include <time.h>
48#include <stdlib.h>
49#include <unistd.h>
50
51using namespace KMime;
52
53namespace KMime {
54
55QList<QByteArray> c_harsetCache;
56QList<QByteArray> l_anguageCache;
57QString f_allbackCharEnc;
58bool u_seOutlookEncoding = false;
59
60QByteArray cachedCharset( const QByteArray &name )
61{
62 foreach ( const QByteArray& charset, c_harsetCache ) {
63 if ( qstricmp( name.data(), charset.data() ) == 0 ) {
64 return charset;
65 }
66 }
67
68 c_harsetCache.append( name.toUpper() );
69 //kDebug() << "KNMimeBase::cachedCharset() number of cs" << c_harsetCache.count();
70 return c_harsetCache.last();
71}
72
73QByteArray cachedLanguage( const QByteArray &name )
74{
75 foreach ( const QByteArray& language, l_anguageCache ) {
76 if ( qstricmp( name.data(), language.data() ) == 0 ) {
77 return language;
78 }
79 }
80
81 l_anguageCache.append( name.toUpper() );
82 //kDebug() << "KNMimeBase::cachedCharset() number of cs" << c_harsetCache.count();
83 return l_anguageCache.last();
84}
85
86bool isUsAscii( const QString &s )
87{
88 uint sLength = s.length();
89 for ( uint i=0; i<sLength; i++ ) {
90 if ( s.at( i ).toLatin1() <= 0 ) { // c==0: non-latin1, c<0: non-us-ascii
91 return false;
92 }
93 }
94 return true;
95}
96
97QString nameForEncoding( Headers::contentEncoding enc )
98{
99 switch ( enc ) {
100 case Headers::CE7Bit: return QString::fromLatin1( "7bit" );
101 case Headers::CE8Bit: return QString::fromLatin1( "8bit" );
102 case Headers::CEquPr: return QString::fromLatin1( "quoted-printable" );
103 case Headers::CEbase64: return QString::fromLatin1( "base64" );
104 case Headers::CEuuenc: return QString::fromLatin1( "uuencode" );
105 case Headers::CEbinary: return QString::fromLatin1( "binary" );
106 default: return QString::fromLatin1( "unknown" );
107 }
108}
109
110QList<Headers::contentEncoding> encodingsForData( const QByteArray &data )
111{
112 QList<Headers::contentEncoding> allowed;
113 CharFreq cf( data );
114
115 switch ( cf.type() ) {
116 case CharFreq::SevenBitText:
117 allowed << Headers::CE7Bit;
118 case CharFreq::EightBitText:
119 allowed << Headers::CE8Bit;
120 case CharFreq::SevenBitData:
121 if ( cf.printableRatio() > 5.0/6.0 ) {
122 // let n the length of data and p the number of printable chars.
123 // Then base64 \approx 4n/3; qp \approx p + 3(n-p)
124 // => qp < base64 iff p > 5n/6.
125 allowed << Headers::CEquPr;
126 allowed << Headers::CEbase64;
127 } else {
128 allowed << Headers::CEbase64;
129 allowed << Headers::CEquPr;
130 }
131 break;
132 case CharFreq::EightBitData:
133 allowed << Headers::CEbase64;
134 break;
135 case CharFreq::None:
136 default:
137 Q_ASSERT( false );
138 }
139
140 return allowed;
141}
142
143// "(),.:;<>@[\]
144const uchar specialsMap[16] = {
145 0x00, 0x00, 0x00, 0x00, // CTLs
146 0x20, 0xCA, 0x00, 0x3A, // SPACE ... '?'
147 0x80, 0x00, 0x00, 0x1C, // '@' ... '_'
148 0x00, 0x00, 0x00, 0x00 // '`' ... DEL
149};
150
151// "(),:;<>@[\]/=?
152const uchar tSpecialsMap[16] = {
153 0x00, 0x00, 0x00, 0x00, // CTLs
154 0x20, 0xC9, 0x00, 0x3F, // SPACE ... '?'
155 0x80, 0x00, 0x00, 0x1C, // '@' ... '_'
156 0x00, 0x00, 0x00, 0x00 // '`' ... DEL
157};
158
159// all except specials, CTLs, SPACE.
160const uchar aTextMap[16] = {
161 0x00, 0x00, 0x00, 0x00,
162 0x5F, 0x35, 0xFF, 0xC5,
163 0x7F, 0xFF, 0xFF, 0xE3,
164 0xFF, 0xFF, 0xFF, 0xFE
165};
166
167// all except tspecials, CTLs, SPACE.
168const uchar tTextMap[16] = {
169 0x00, 0x00, 0x00, 0x00,
170 0x5F, 0x36, 0xFF, 0xC0,
171 0x7F, 0xFF, 0xFF, 0xE3,
172 0xFF, 0xFF, 0xFF, 0xFE
173};
174
175// none except a-zA-Z0-9!*+-/
176const uchar eTextMap[16] = {
177 0x00, 0x00, 0x00, 0x00,
178 0x40, 0x35, 0xFF, 0xC0,
179 0x7F, 0xFF, 0xFF, 0xE0,
180 0x7F, 0xFF, 0xFF, 0xE0
181};
182
183void setFallbackCharEncoding(const QString& fallbackCharEnc)
184{
185 f_allbackCharEnc = fallbackCharEnc;
186}
187
188QString fallbackCharEncoding()
189{
190 return f_allbackCharEnc;
191}
192
193void setUseOutlookAttachmentEncoding( bool violateStandard )
194{
195 u_seOutlookEncoding = violateStandard;
196}
197
198bool useOutlookAttachmentEncoding()
199{
200 return u_seOutlookEncoding;
201}
202
203
204QString decodeRFC2047String( const QByteArray &src, QByteArray &usedCS,
205 const QByteArray &defaultCS, bool forceCS )
206{
207 QByteArray result;
208 QByteArray spaceBuffer;
209 const char *scursor = src.constData();
210 const char *send = scursor + src.length();
211 bool onlySpacesSinceLastWord = false;
212
213 while ( scursor != send ) {
214 // space
215 if ( isspace( *scursor ) && onlySpacesSinceLastWord ) {
216 spaceBuffer += *scursor++;
217 continue;
218 }
219
220 // possible start of an encoded word
221 if ( *scursor == '=' ) {
222 QByteArray language;
223 QString decoded;
224 ++scursor;
225 const char *start = scursor;
226 if ( HeaderParsing::parseEncodedWord( scursor, send, decoded, language, usedCS, defaultCS, forceCS ) ) {
227 result += decoded.toUtf8();
228 onlySpacesSinceLastWord = true;
229 spaceBuffer.clear();
230 } else {
231 if ( onlySpacesSinceLastWord ) {
232 result += spaceBuffer;
233 onlySpacesSinceLastWord = false;
234 }
235 result += '=';
236 scursor = start; // reset cursor after parsing failure
237 }
238 continue;
239 } else {
240 // unencoded data
241 if ( onlySpacesSinceLastWord ) {
242 result += spaceBuffer;
243 onlySpacesSinceLastWord = false;
244 }
245 result += *scursor;
246 ++scursor;
247 }
248 }
249 // If there are any chars that couldn't be decoded in UTF-8,
250 // use the fallback charset if it exists
251 const QString tryUtf8 = QString::fromUtf8( result );
252 if ( tryUtf8.contains( 0xFFFD ) && !f_allbackCharEnc.isEmpty() ) {
253 QTextCodec* codec = KGlobal::charsets()->codecForName( f_allbackCharEnc );
254 return codec->toUnicode( result );
255 } else {
256 return tryUtf8;
257 }
258}
259
260QString decodeRFC2047String( const QByteArray &src )
261{
262 QByteArray usedCS;
263 return decodeRFC2047String( src, usedCS, "utf-8", false );
264}
265
266static const char *reservedCharacters = "\"()<>@,.;:\\[]=";
267
268QByteArray encodeRFC2047String( const QString &src, const QByteArray &charset,
269 bool addressHeader, bool allow8BitHeaders )
270{
271 QByteArray result;
272 int start=0, end=0;
273 bool nonAscii=false, ok=true, useQEncoding=false;
274
275 // fromLatin1() is safe here, codecForName() uses toLatin1() internally
276 const QTextCodec *codec = KGlobal::charsets()->codecForName( QString::fromLatin1( charset ), ok );
277
278 QByteArray usedCS;
279 if ( !ok ) {
280 //no codec available => try local8Bit and hope the best ;-)
281 usedCS = KGlobal::locale()->encoding();
282 codec = KGlobal::charsets()->codecForName( QString::fromLatin1( usedCS ), ok );
283 } else {
284 Q_ASSERT( codec );
285 if ( charset.isEmpty() ) {
286 usedCS = codec->name();
287 } else {
288 usedCS = charset;
289 }
290 }
291
292 QTextCodec::ConverterState converterState( QTextCodec::IgnoreHeader );
293 QByteArray encoded8Bit = codec->fromUnicode( src.constData(), src.length(), &converterState );
294 if ( converterState.invalidChars > 0 ) {
295 usedCS = "utf-8";
296 codec = QTextCodec::codecForName( usedCS );
297 encoded8Bit = codec->fromUnicode( src );
298 }
299
300 if ( usedCS.contains( "8859-" ) ) { // use "B"-Encoding for non iso-8859-x charsets
301 useQEncoding = true;
302 }
303
304 if ( allow8BitHeaders ) {
305 return encoded8Bit;
306 }
307
308 uint encoded8BitLength = encoded8Bit.length();
309 for ( unsigned int i=0; i<encoded8BitLength; i++ ) {
310 if ( encoded8Bit[i] == ' ' ) { // encoding starts at word boundaries
311 start = i + 1;
312 }
313
314 // encode escape character, for japanese encodings...
315 if ( ( (signed char)encoded8Bit[i] < 0 ) || ( encoded8Bit[i] == '\033' ) ||
316 ( addressHeader && ( strchr( "\"()<>@,.;:\\[]=", encoded8Bit[i] ) != 0 ) ) ) {
317 end = start; // non us-ascii char found, now we determine where to stop encoding
318 nonAscii = true;
319 break;
320 }
321 }
322
323 if ( nonAscii ) {
324 while ( ( end < encoded8Bit.length() ) && ( encoded8Bit[end] != ' ' ) ) {
325 // we encode complete words
326 end++;
327 }
328
329 for ( int x=end; x<encoded8Bit.length(); x++ ) {
330 if ( ( (signed char)encoded8Bit[x] < 0 ) || ( encoded8Bit[x] == '\033' ) ||
331 ( addressHeader && ( strchr( reservedCharacters, encoded8Bit[x] ) != 0 ) ) ) {
332 end = x; // we found another non-ascii word
333
334 while ( ( end < encoded8Bit.length() ) && ( encoded8Bit[end] != ' ' ) ) {
335 // we encode complete words
336 end++;
337 }
338 }
339 }
340
341 result = encoded8Bit.left( start ) + "=?" + usedCS;
342
343 if ( useQEncoding ) {
344 result += "?Q?";
345
346 char c, hexcode;// "Q"-encoding implementation described in RFC 2047
347 for ( int i=start; i<end; i++ ) {
348 c = encoded8Bit[i];
349 if ( c == ' ' ) { // make the result readable with not MIME-capable readers
350 result += '_';
351 } else {
352 if ( ( ( c >= 'a' ) && ( c <= 'z' ) ) || // paranoid mode, encode *all* special chars to avoid problems
353 ( ( c >= 'A' ) && ( c <= 'Z' ) ) || // with "From" & "To" headers
354 ( ( c >= '0' ) && ( c <= '9' ) ) ) {
355 result += c;
356 } else {
357 result += '='; // "stolen" from KMail ;-)
358 hexcode = ( ( c & 0xF0 ) >> 4 ) + 48;
359 if ( hexcode >= 58 ) {
360 hexcode += 7;
361 }
362 result += hexcode;
363 hexcode = ( c & 0x0F ) + 48;
364 if ( hexcode >= 58 ) {
365 hexcode += 7;
366 }
367 result += hexcode;
368 }
369 }
370 }
371 } else {
372 result += "?B?" + encoded8Bit.mid( start, end - start ).toBase64();
373 }
374
375 result +="?=";
376 result += encoded8Bit.right( encoded8Bit.length() - end );
377 } else {
378 result = encoded8Bit;
379 }
380
381 return result;
382}
383
384QByteArray encodeRFC2047Sentence(const QString& src, const QByteArray& charset )
385{
386 QByteArray result;
387 QList<QChar> splitChars;
388 splitChars << QLatin1Char( ',' ) << QLatin1Char( '\"' ) << QLatin1Char( ';' ) << QLatin1Char( '\\' );
389 const QChar *ch = src.constData();
390 const int length = src.length();
391 int pos = 0;
392 int wordStart = 0;
393
394 //qDebug() << "Input:" << src;
395 // Loop over all characters of the string.
396 // When encountering a split character, RFC-2047-encode the word before it, and add it to the result.
397 while ( pos < length ) {
398 //qDebug() << "Pos:" << pos << "Result:" << result << "Char:" << ch->toLatin1();
399 const bool isAscii = ch->unicode() < 127;
400 const bool isReserved = ( strchr( reservedCharacters, ch->toLatin1() ) != 0 );
401 if ( isAscii && isReserved ) {
402 const int wordSize = pos - wordStart;
403 if ( wordSize > 0 ) {
404 const QString word = src.mid( wordStart, wordSize );
405 result += encodeRFC2047String( word, charset );
406 }
407
408 result += ch->toLatin1();
409 wordStart = pos + 1;
410 }
411 ch++;
412 pos++;
413 }
414
415 // Encode the last word
416 const int wordSize = pos - wordStart;
417 if ( wordSize > 0 ) {
418 const QString word = src.mid( wordStart, pos - wordStart );
419 result += encodeRFC2047String( word, charset );
420 }
421
422 return result;
423}
424
425
426
427//-----------------------------------------------------------------------------
428QByteArray encodeRFC2231String( const QString& str, const QByteArray& charset )
429{
430 if ( str.isEmpty() ) {
431 return QByteArray();
432 }
433
434 const QTextCodec *codec = KGlobal::charsets()->codecForName( QString::fromLatin1( charset ) );
435 QByteArray latin;
436 if ( charset == "us-ascii" ) {
437 latin = str.toLatin1();
438 } else if ( codec ) {
439 latin = codec->fromUnicode( str );
440 } else {
441 latin = str.toLocal8Bit();
442 }
443
444 char *l;
445 for ( l = latin.data(); *l; ++l ) {
446 if ( ( ( *l & 0xE0 ) == 0 ) || ( *l & 0x80 ) ) {
447 // *l is control character or 8-bit char
448 break;
449 }
450 }
451 if ( !*l ) {
452 return latin;
453 }
454
455 QByteArray result = charset + "''";
456 for ( l = latin.data(); *l; ++l ) {
457 bool needsQuoting = ( *l & 0x80 ) || ( *l == '%' );
458 if ( !needsQuoting ) {
459 const QByteArray especials = "()<>@,;:\"/[]?.= \033";
460 int len = especials.length();
461 for ( int i = 0; i < len; i++ ) {
462 if ( *l == especials[i] ) {
463 needsQuoting = true;
464 break;
465 }
466 }
467 }
468 if ( needsQuoting ) {
469 result += '%';
470 unsigned char hexcode;
471 hexcode = ( ( *l & 0xF0 ) >> 4 ) + 48;
472 if ( hexcode >= 58 ) {
473 hexcode += 7;
474 }
475 result += hexcode;
476 hexcode = ( *l & 0x0F ) + 48;
477 if ( hexcode >= 58 ) {
478 hexcode += 7;
479 }
480 result += hexcode;
481 } else {
482 result += *l;
483 }
484 }
485 return result;
486}
487
488
489//-----------------------------------------------------------------------------
490QString decodeRFC2231String( const QByteArray &str, QByteArray &usedCS, const QByteArray &defaultCS,
491 bool forceCS )
492{
493 int p = str.indexOf( '\'' );
494 if ( p < 0 ) {
495 return KGlobal::charsets()->codecForName( QString::fromLatin1( defaultCS ) )->toUnicode( str );
496 }
497
498
499 QByteArray charset = str.left( p );
500
501 QByteArray st = str.mid( str.lastIndexOf( '\'' ) + 1 );
502
503 char ch, ch2;
504 p = 0;
505 while ( p < (int)st.length() ) {
506 if ( st.at( p ) == 37 ) {
507 // Only try to decode the percent-encoded character if the percent sign
508 // is really followed by two other characters, see testcase at bug 163024
509 if ( p + 2 < st.length() ) {
510 ch = st.at( p + 1 ) - 48;
511 if ( ch > 16 ) {
512 ch -= 7;
513 }
514 ch2 = st.at( p + 2 ) - 48;
515 if ( ch2 > 16 ) {
516 ch2 -= 7;
517 }
518 st[p] = ch * 16 + ch2;
519 st.remove( p + 1, 2 );
520 }
521 }
522 p++;
523 }
524 kDebug() << "Got pre-decoded:" << st;
525 QString result;
526 const QTextCodec * charsetcodec = KGlobal::charsets()->codecForName( QString::fromLatin1( charset ) );
527 if ( !charsetcodec || forceCS ) {
528 charsetcodec = KGlobal::charsets()->codecForName( QString::fromLatin1( defaultCS ) );
529 }
530
531 usedCS = charsetcodec->name();
532 return charsetcodec->toUnicode( st );
533}
534
535QString decodeRFC2231String( const QByteArray &src )
536{
537 QByteArray usedCS;
538 return decodeRFC2231String( src, usedCS, "utf-8", false );
539}
540
541QByteArray uniqueString()
542{
543 static char chars[] = "0123456789abcdefghijklmnopqrstuvxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
544 time_t now;
545 char p[11];
546 int pos, ran;
547 unsigned int timeval;
548
549 p[10] = '\0';
550 now = time( 0 );
551 ran = 1 + (int)( 1000.0 * rand() / ( RAND_MAX + 1.0 ) );
552 timeval = ( now / ran ) + getpid();
553
554 for ( int i = 0; i < 10; i++ ) {
555 pos = (int) ( 61.0 * rand() / ( RAND_MAX + 1.0 ) );
556 //kDebug() << pos;
557 p[i] = chars[pos];
558 }
559
560 QByteArray ret;
561 ret.setNum( timeval );
562 ret += '.';
563 ret += p;
564
565 return ret;
566}
567
568QByteArray multiPartBoundary()
569{
570 return "nextPart" + uniqueString();
571}
572
573QByteArray unfoldHeader( const QByteArray &header )
574{
575 QByteArray result;
576 if ( header.isEmpty() ) {
577 return result;
578 }
579
580 int pos = 0, foldBegin = 0, foldMid = 0, foldEnd = 0;
581 while ( ( foldMid = header.indexOf( '\n', pos ) ) >= 0 ) {
582 foldBegin = foldEnd = foldMid;
583 // find the first space before the line-break
584 while ( foldBegin > 0 ) {
585 if ( !QChar::fromLatin1( header[foldBegin - 1] ).isSpace() ) {
586 break;
587 }
588 --foldBegin;
589 }
590 // find the first non-space after the line-break
591 while ( foldEnd <= header.length() - 1 ) {
592 if ( QChar::fromLatin1( header[foldEnd] ).isSpace() ) {
593 ++foldEnd;
594 } else if ( foldEnd > 0 && header[foldEnd - 1] == '\n' &&
595 header[foldEnd] == '=' && foldEnd + 2 < header.length() &&
596 ( ( header[foldEnd + 1] == '0' &&
597 header[foldEnd + 2] == '9' ) ||
598 ( header[foldEnd + 1] == '2' &&
599 header[foldEnd + 2] == '0' ) ) ) {
600 // bug #86302: malformed header continuation starting with =09/=20
601 foldEnd += 3;
602 }
603 else {
604 break;
605 }
606 }
607
608 result += header.mid( pos, foldBegin - pos );
609 if ( foldEnd < header.length() - 1 ) {
610 result += ' ';
611 }
612 pos = foldEnd;
613 }
614 const int len = header.length();
615 if ( len > pos ) {
616 result += header.mid( pos, len - pos );
617 }
618 return result;
619}
620
621int findHeaderLineEnd( const QByteArray &src, int &dataBegin, bool *folded )
622{
623 int end = dataBegin;
624 int len = src.length() - 1;
625
626 if ( folded ) {
627 *folded = false;
628 }
629
630 if ( dataBegin < 0 ) {
631 // Not found
632 return -1;
633 }
634
635 if ( dataBegin > len ) {
636 // No data available
637 return len + 1;
638 }
639
640 // If the first line contains nothing, but the next line starts with a space
641 // or a tab, that means a stupid mail client has made the first header field line
642 // entirely empty, and has folded the rest to the next line(s).
643 if ( src.at( end ) == '\n' && end + 1 < len &&
644 ( src[end + 1] == ' ' || src[end + 1] == '\t' ) ) {
645
646 // Skip \n and first whitespace
647 dataBegin += 2;
648 end += 2;
649 }
650
651 if ( src.at( end ) != '\n' ) { // check if the header is not empty
652 while ( true ) {
653 end = src.indexOf( '\n', end + 1 );
654 if ( end == -1 || end == len ) {
655 // end of string
656 break;
657 } else if ( src[end + 1] == ' ' || src[end + 1] == '\t' ||
658 ( src[end + 1] == '=' && end + 3 <= len &&
659 ( ( src[end + 2] == '0' && src[end + 3] == '9' ) ||
660 ( src[end + 2] == '2' && src[end + 3] == '0' ) ) ) ) {
661 // next line is header continuation or starts with =09/=20 (bug #86302)
662 if ( folded ) {
663 *folded = true;
664 }
665 } else {
666 // end of header (no header continuation)
667 break;
668 }
669 }
670 }
671
672 if ( end < 0 ) {
673 end = len + 1; //take the rest of the string
674 }
675 return end;
676}
677
678int indexOfHeader( const QByteArray &src, const QByteArray &name, int &end, int &dataBegin, bool *folded )
679{
680 QByteArray n = name;
681 n.append( ':' );
682 int begin = -1;
683
684 if ( qstrnicmp( n.constData(), src.constData(), n.length() ) == 0 ) {
685 begin = 0;
686 } else {
687 n.prepend( '\n' );
688 const char *p = strcasestr( src.constData(), n.constData() );
689 if ( !p ) {
690 begin = -1;
691 } else {
692 begin = p - src.constData();
693 ++begin;
694 }
695 }
696
697 if ( begin > -1 ) { //there is a header with the given name
698 dataBegin = begin + name.length() + 1; //skip the name
699 // skip the usual space after the colon
700 if ( src.at( dataBegin ) == ' ' ) {
701 ++dataBegin;
702 }
703 end = findHeaderLineEnd( src, dataBegin, folded );
704 return begin;
705
706 } else {
707 end = -1;
708 dataBegin = -1;
709 return -1; //header not found
710 }
711}
712
713QByteArray extractHeader( const QByteArray &src, const QByteArray &name )
714{
715 int begin, end;
716 bool folded;
717 QByteArray result;
718
719 if ( src.isEmpty() || indexOfHeader( src, name, end, begin, &folded ) < 0 ) {
720 return result;
721 }
722
723 if ( begin >= 0 ) {
724 if ( !folded ) {
725 result = src.mid( begin, end - begin );
726 } else {
727 if ( end > begin ) {
728 QByteArray hdrValue = src.mid( begin, end - begin );
729 result = unfoldHeader( hdrValue );
730 }
731 }
732 }
733 return result;
734}
735
736QList<QByteArray> extractHeaders( const QByteArray &src, const QByteArray &name )
737{
738 int begin, end;
739 bool folded;
740 QList<QByteArray> result;
741 QByteArray copySrc( src );
742
743 if ( indexOfHeader( copySrc, name, end, begin, &folded ) < 0 ) {
744 return result;
745 }
746
747 while ( begin >= 0 ) {
748 if ( !folded ) {
749 result.append( copySrc.mid( begin, end - begin ) );
750 } else {
751 QByteArray hdrValue = copySrc.mid( begin, end - begin );
752 result.append( unfoldHeader( hdrValue ) );
753 }
754
755 // get the next one, a tiny bit ugly, but we don't want the previous to be found again...
756 copySrc = copySrc.mid( end );
757 if ( indexOfHeader( copySrc, name, end, begin, &folded ) < 0 ) {
758 break;
759 }
760 }
761 return result;
762}
763
764void removeHeader( QByteArray &header, const QByteArray &name )
765{
766 int begin, end, dummy;
767 begin = indexOfHeader( header, name, end, dummy );
768 if ( begin >= 0 ) {
769 header.remove( begin, end - begin + 1 );
770 }
771}
772
773QByteArray CRLFtoLF( const QByteArray &s )
774{
775 QByteArray ret = s;
776 ret.replace( "\r\n", "\n" );
777 return ret;
778}
779
780QByteArray CRLFtoLF( const char *s )
781{
782 QByteArray ret = s;
783 return CRLFtoLF( ret );
784}
785
786QByteArray LFtoCRLF( const QByteArray &s )
787{
788 QByteArray ret = s;
789 ret.replace( '\n', "\r\n" );
790 return ret;
791}
792
793QByteArray LFtoCRLF( const char *s )
794{
795 QByteArray ret = s;
796 return LFtoCRLF( ret );
797}
798
799namespace {
800template < typename StringType, typename CharType > void removeQuotesGeneric( StringType & str )
801{
802 bool inQuote = false;
803 for ( int i = 0; i < str.length(); ++i ) {
804 if ( str[i] == CharType( '"' ) ) {
805 str.remove( i, 1 );
806 i--;
807 inQuote = !inQuote;
808 } else {
809 if ( inQuote && ( str[i] == CharType( '\\' ) ) ) {
810 str.remove( i, 1 );
811 }
812 }
813 }
814}
815}
816
817void removeQuots( QByteArray &str )
818{
819 removeQuotesGeneric<QByteArray, char>( str );
820}
821
822void removeQuots( QString &str )
823{
824 removeQuotesGeneric<QString, QLatin1Char>( str );
825}
826
827template<class StringType,class CharType,class CharConverterType,class StringConverterType,class ToString>
828void addQuotes_impl( StringType &str, bool forceQuotes )
829{
830 bool needsQuotes=false;
831 for ( int i=0; i < str.length(); i++ ) {
832 const CharType cur = str.at( i );
833 if ( QString( ToString( str ) ).contains( QRegExp( QLatin1String( "\"|\\\\|=|\\]|\\[|:|;|,|\\.|,|@|<|>|\\)|\\(" ) ) ) ) {
834 needsQuotes = true;
835 }
836 if ( cur == CharConverterType( '\\' ) || cur == CharConverterType( '\"' ) ) {
837 str.insert( i, CharConverterType( '\\' ) );
838 i++;
839 }
840 }
841
842 if ( needsQuotes || forceQuotes ) {
843 str.insert( 0, CharConverterType( '\"' ) );
844 str.append( StringConverterType( "\"" ) );
845 }
846}
847
848void addQuotes( QByteArray &str, bool forceQuotes )
849{
850 addQuotes_impl<QByteArray, char, char, char*, QLatin1String>( str, forceQuotes );
851}
852
853void addQuotes( QString &str, bool forceQuotes )
854{
855 addQuotes_impl<QString, QChar, QLatin1Char, QLatin1String, QString>( str, forceQuotes );
856}
857
858KMIME_EXPORT QString balanceBidiState( const QString &input )
859{
860 const int LRO = 0x202D;
861 const int RLO = 0x202E;
862 const int LRE = 0x202A;
863 const int RLE = 0x202B;
864 const int PDF = 0x202C;
865
866 QString result = input;
867
868 int openDirChangers = 0;
869 int numPDFsRemoved = 0;
870 for ( int i = 0; i < input.length(); i++ ) {
871 const ushort &code = input.at( i ).unicode();
872 if ( code == LRO || code == RLO || code == LRE || code == RLE ) {
873 openDirChangers++;
874 } else if ( code == PDF ) {
875 if ( openDirChangers > 0 ) {
876 openDirChangers--;
877 } else {
878 // One PDF too much, remove it
879 kWarning() << "Possible Unicode spoofing (unexpected PDF) detected in" << input;
880 result.remove( i - numPDFsRemoved, 1 );
881 numPDFsRemoved++;
882 }
883 }
884 }
885
886 if ( openDirChangers > 0 ) {
887 kWarning() << "Possible Unicode spoofing detected in" << input;
888
889 // At PDF chars to the end until the correct state is restored.
890 // As a special exception, when encountering quoted strings, place the PDF before
891 // the last quote.
892 for ( int i = openDirChangers; i > 0; i-- ) {
893 if ( result.endsWith( QLatin1Char( '"' ) ) ) {
894 result.insert( result.length() - 1, QChar( PDF ) );
895 } else {
896 result += QChar( PDF );
897 }
898 }
899 }
900
901 return result;
902}
903
904QString removeBidiControlChars( const QString &input )
905{
906 const int LRO = 0x202D;
907 const int RLO = 0x202E;
908 const int LRE = 0x202A;
909 const int RLE = 0x202B;
910 QString result = input;
911 result.remove( LRO );
912 result.remove( RLO );
913 result.remove( LRE );
914 result.remove( RLE );
915 return result;
916}
917
918static bool isCryptoPart( Content* content )
919{
920 if ( !content->contentType( false ) ) {
921 return false;
922 }
923
924 if ( content->contentType()->subType().toLower() == "octet-stream" &&
925 !content->contentDisposition( false ) ) {
926 return false;
927 }
928
929 const Headers::ContentType *contentType = content->contentType();
930 const QByteArray lowerSubType = contentType->subType().toLower();
931 return ( contentType->mediaType().toLower() == "application" &&
932 ( lowerSubType == "pgp-encrypted" ||
933 lowerSubType == "pgp-signature" ||
934 lowerSubType == "pkcs7-mime" ||
935 lowerSubType == "pkcs7-signature" ||
936 lowerSubType == "x-pkcs7-signature" ||
937 ( lowerSubType == "octet-stream" &&
938 content->contentDisposition()->filename().toLower() == QLatin1String( "msg.asc" ) ) ) );
939}
940
941bool hasAttachment( Content* content )
942{
943 if ( !content ) {
944 return false;
945 }
946
947 bool emptyFilename = true;
948 if ( content->contentDisposition( false ) &&
949 !content->contentDisposition()->filename().isEmpty() ) {
950 emptyFilename = false;
951 }
952
953 if ( emptyFilename &&
954 content->contentType( false ) &&
955 !content->contentType()->name().isEmpty() ) {
956 emptyFilename = false;
957 }
958
959 // ignore crypto parts
960 if ( !emptyFilename && !isCryptoPart( content ) ) {
961 return true;
962 }
963
964 // Ok, content itself is not an attachment. now we deal with multiparts
965 if ( content->contentType()->isMultipart() ) {
966 Q_FOREACH ( Content *child, content->contents() ) {
967 if ( hasAttachment( child ) ) {
968 return true;
969 }
970 }
971 }
972 return false;
973}
974
975bool isSigned( Message *message )
976{
977 if ( !message ) {
978 return false;
979 }
980
981 const KMime::Headers::ContentType* const contentType = message->contentType();
982 if ( contentType->isSubtype( "signed" ) ||
983 contentType->isSubtype( "pgp-signature" ) ||
984 contentType->isSubtype( "pkcs7-signature" ) ||
985 contentType->isSubtype( "x-pkcs7-signature" ) ||
986 message->mainBodyPart( "multipart/signed" ) ||
987 message->mainBodyPart( "application/pgp-signature" ) ||
988 message->mainBodyPart( "application/pkcs7-signature" ) ||
989 message->mainBodyPart( "application/x-pkcs7-signature" ) ) {
990 return true;
991 }
992 return false;
993}
994
995bool isEncrypted( Message *message )
996{
997 if ( !message ) {
998 return false;
999 }
1000
1001 const KMime::Headers::ContentType* const contentType = message->contentType();
1002 if ( contentType->isSubtype( "encrypted" ) ||
1003 contentType->isSubtype( "pgp-encrypted" ) ||
1004 contentType->isSubtype( "pkcs7-mime" ) ||
1005 message->mainBodyPart( "multipart/encrypted" ) ||
1006 message->mainBodyPart( "application/pgp-encrypted" ) ||
1007 message->mainBodyPart( "application/pkcs7-mime" ) ) {
1008 return true;
1009 }
1010
1011 return false;
1012}
1013
1014bool isInvitation( Content *content )
1015{
1016 if ( !content ) {
1017 return false;
1018 }
1019
1020 const KMime::Headers::ContentType* const contentType = content->contentType( false );
1021
1022 if ( contentType && contentType->isMediatype( "text" ) && contentType->isSubtype( "calendar" ) ) {
1023 return true;
1024 }
1025
1026 return false;
1027}
1028
1029} // namespace KMime
1030