1 | /* |
2 | kmime_util.cpp |
3 | |
4 | KMime, the KDE Internet mail/usenet news message library. |
5 | Copyright (c) 2001 the KMime authors. |
6 | See file AUTHORS for details |
7 | |
8 | This library is free software; you can redistribute it and/or |
9 | modify it under the terms of the GNU Library General Public |
10 | License as published by the Free Software Foundation; either |
11 | version 2 of the License, or (at your option) any later version. |
12 | |
13 | This library is distributed in the hope that it will be useful, |
14 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
15 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
16 | Library General Public License for more details. |
17 | |
18 | You should have received a copy of the GNU Library General Public License |
19 | along with this library; see the file COPYING.LIB. If not, write to |
20 | the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, |
21 | Boston, MA 02110-1301, USA. |
22 | */ |
23 | |
24 | #include "kmime_util.h" |
25 | #include "kmime_util_p.h" |
26 | |
27 | #include "kmime_charfreq.h" |
28 | #include "kmime_codecs.h" |
29 | #include "kmime_header_parsing.h" |
30 | #include "kmime_message.h" |
31 | #include "kmime_warning.h" |
32 | |
33 | #include <config-kmime.h> |
34 | #include <kdefakes.h> // for strcasestr |
35 | #include <kglobal.h> |
36 | #include <klocale.h> |
37 | #include <klocalizedstring.h> |
38 | #include <kcharsets.h> |
39 | #include <kcodecs.h> |
40 | #include <kdebug.h> |
41 | |
42 | #include <QtCore/QList> |
43 | #include <QtCore/QString> |
44 | #include <QtCore/QTextCodec> |
45 | |
46 | #include <ctype.h> |
47 | #include <time.h> |
48 | #include <stdlib.h> |
49 | #include <unistd.h> |
50 | |
51 | using namespace KMime; |
52 | |
53 | namespace KMime { |
54 | |
55 | QList<QByteArray> c_harsetCache; |
56 | QList<QByteArray> l_anguageCache; |
57 | QString f_allbackCharEnc; |
58 | bool u_seOutlookEncoding = false; |
59 | |
60 | QByteArray cachedCharset( const QByteArray &name ) |
61 | { |
62 | foreach ( const QByteArray& charset, c_harsetCache ) { |
63 | if ( qstricmp( name.data(), charset.data() ) == 0 ) { |
64 | return charset; |
65 | } |
66 | } |
67 | |
68 | c_harsetCache.append( name.toUpper() ); |
69 | //kDebug() << "KNMimeBase::cachedCharset() number of cs" << c_harsetCache.count(); |
70 | return c_harsetCache.last(); |
71 | } |
72 | |
73 | QByteArray cachedLanguage( const QByteArray &name ) |
74 | { |
75 | foreach ( const QByteArray& language, l_anguageCache ) { |
76 | if ( qstricmp( name.data(), language.data() ) == 0 ) { |
77 | return language; |
78 | } |
79 | } |
80 | |
81 | l_anguageCache.append( name.toUpper() ); |
82 | //kDebug() << "KNMimeBase::cachedCharset() number of cs" << c_harsetCache.count(); |
83 | return l_anguageCache.last(); |
84 | } |
85 | |
86 | bool isUsAscii( const QString &s ) |
87 | { |
88 | uint sLength = s.length(); |
89 | for ( uint i=0; i<sLength; i++ ) { |
90 | if ( s.at( i ).toLatin1() <= 0 ) { // c==0: non-latin1, c<0: non-us-ascii |
91 | return false; |
92 | } |
93 | } |
94 | return true; |
95 | } |
96 | |
97 | QString ( Headers::contentEncoding enc ) |
98 | { |
99 | switch ( enc ) { |
100 | case Headers::CE7Bit: return QString::fromLatin1( "7bit" ); |
101 | case Headers::CE8Bit: return QString::fromLatin1( "8bit" ); |
102 | case Headers::CEquPr: return QString::fromLatin1( "quoted-printable" ); |
103 | case Headers::CEbase64: return QString::fromLatin1( "base64" ); |
104 | case Headers::CEuuenc: return QString::fromLatin1( "uuencode" ); |
105 | case Headers::CEbinary: return QString::fromLatin1( "binary" ); |
106 | default: return QString::fromLatin1( "unknown" ); |
107 | } |
108 | } |
109 | |
110 | QList<Headers::contentEncoding> encodingsForData( const QByteArray &data ) |
111 | { |
112 | QList<Headers::contentEncoding> allowed; |
113 | CharFreq cf( data ); |
114 | |
115 | switch ( cf.type() ) { |
116 | case CharFreq::SevenBitText: |
117 | allowed << Headers::CE7Bit; |
118 | case CharFreq::EightBitText: |
119 | allowed << Headers::CE8Bit; |
120 | case CharFreq::SevenBitData: |
121 | if ( cf.printableRatio() > 5.0/6.0 ) { |
122 | // let n the length of data and p the number of printable chars. |
123 | // Then base64 \approx 4n/3; qp \approx p + 3(n-p) |
124 | // => qp < base64 iff p > 5n/6. |
125 | allowed << Headers::CEquPr; |
126 | allowed << Headers::CEbase64; |
127 | } else { |
128 | allowed << Headers::CEbase64; |
129 | allowed << Headers::CEquPr; |
130 | } |
131 | break; |
132 | case CharFreq::EightBitData: |
133 | allowed << Headers::CEbase64; |
134 | break; |
135 | case CharFreq::None: |
136 | default: |
137 | Q_ASSERT( false ); |
138 | } |
139 | |
140 | return allowed; |
141 | } |
142 | |
143 | // "(),.:;<>@[\] |
144 | const uchar specialsMap[16] = { |
145 | 0x00, 0x00, 0x00, 0x00, // CTLs |
146 | 0x20, 0xCA, 0x00, 0x3A, // SPACE ... '?' |
147 | 0x80, 0x00, 0x00, 0x1C, // '@' ... '_' |
148 | 0x00, 0x00, 0x00, 0x00 // '`' ... DEL |
149 | }; |
150 | |
151 | // "(),:;<>@[\]/=? |
152 | const uchar tSpecialsMap[16] = { |
153 | 0x00, 0x00, 0x00, 0x00, // CTLs |
154 | 0x20, 0xC9, 0x00, 0x3F, // SPACE ... '?' |
155 | 0x80, 0x00, 0x00, 0x1C, // '@' ... '_' |
156 | 0x00, 0x00, 0x00, 0x00 // '`' ... DEL |
157 | }; |
158 | |
159 | // all except specials, CTLs, SPACE. |
160 | const uchar aTextMap[16] = { |
161 | 0x00, 0x00, 0x00, 0x00, |
162 | 0x5F, 0x35, 0xFF, 0xC5, |
163 | 0x7F, 0xFF, 0xFF, 0xE3, |
164 | 0xFF, 0xFF, 0xFF, 0xFE |
165 | }; |
166 | |
167 | // all except tspecials, CTLs, SPACE. |
168 | const uchar tTextMap[16] = { |
169 | 0x00, 0x00, 0x00, 0x00, |
170 | 0x5F, 0x36, 0xFF, 0xC0, |
171 | 0x7F, 0xFF, 0xFF, 0xE3, |
172 | 0xFF, 0xFF, 0xFF, 0xFE |
173 | }; |
174 | |
175 | // none except a-zA-Z0-9!*+-/ |
176 | const uchar eTextMap[16] = { |
177 | 0x00, 0x00, 0x00, 0x00, |
178 | 0x40, 0x35, 0xFF, 0xC0, |
179 | 0x7F, 0xFF, 0xFF, 0xE0, |
180 | 0x7F, 0xFF, 0xFF, 0xE0 |
181 | }; |
182 | |
183 | void setFallbackCharEncoding(const QString& fallbackCharEnc) |
184 | { |
185 | f_allbackCharEnc = fallbackCharEnc; |
186 | } |
187 | |
188 | QString fallbackCharEncoding() |
189 | { |
190 | return f_allbackCharEnc; |
191 | } |
192 | |
193 | void setUseOutlookAttachmentEncoding( bool violateStandard ) |
194 | { |
195 | u_seOutlookEncoding = violateStandard; |
196 | } |
197 | |
198 | bool useOutlookAttachmentEncoding() |
199 | { |
200 | return u_seOutlookEncoding; |
201 | } |
202 | |
203 | |
204 | QString decodeRFC2047String( const QByteArray &src, QByteArray &usedCS, |
205 | const QByteArray &defaultCS, bool forceCS ) |
206 | { |
207 | QByteArray result; |
208 | QByteArray spaceBuffer; |
209 | const char *scursor = src.constData(); |
210 | const char *send = scursor + src.length(); |
211 | bool onlySpacesSinceLastWord = false; |
212 | |
213 | while ( scursor != send ) { |
214 | // space |
215 | if ( isspace( *scursor ) && onlySpacesSinceLastWord ) { |
216 | spaceBuffer += *scursor++; |
217 | continue; |
218 | } |
219 | |
220 | // possible start of an encoded word |
221 | if ( *scursor == '=' ) { |
222 | QByteArray language; |
223 | QString decoded; |
224 | ++scursor; |
225 | const char *start = scursor; |
226 | if ( HeaderParsing::parseEncodedWord( scursor, send, decoded, language, usedCS, defaultCS, forceCS ) ) { |
227 | result += decoded.toUtf8(); |
228 | onlySpacesSinceLastWord = true; |
229 | spaceBuffer.clear(); |
230 | } else { |
231 | if ( onlySpacesSinceLastWord ) { |
232 | result += spaceBuffer; |
233 | onlySpacesSinceLastWord = false; |
234 | } |
235 | result += '='; |
236 | scursor = start; // reset cursor after parsing failure |
237 | } |
238 | continue; |
239 | } else { |
240 | // unencoded data |
241 | if ( onlySpacesSinceLastWord ) { |
242 | result += spaceBuffer; |
243 | onlySpacesSinceLastWord = false; |
244 | } |
245 | result += *scursor; |
246 | ++scursor; |
247 | } |
248 | } |
249 | // If there are any chars that couldn't be decoded in UTF-8, |
250 | // use the fallback charset if it exists |
251 | const QString tryUtf8 = QString::fromUtf8( result ); |
252 | if ( tryUtf8.contains( 0xFFFD ) && !f_allbackCharEnc.isEmpty() ) { |
253 | QTextCodec* codec = KGlobal::charsets()->codecForName( f_allbackCharEnc ); |
254 | return codec->toUnicode( result ); |
255 | } else { |
256 | return tryUtf8; |
257 | } |
258 | } |
259 | |
260 | QString decodeRFC2047String( const QByteArray &src ) |
261 | { |
262 | QByteArray usedCS; |
263 | return decodeRFC2047String( src, usedCS, "utf-8" , false ); |
264 | } |
265 | |
266 | static const char *reservedCharacters = "\"()<>@,.;:\\[]=" ; |
267 | |
268 | QByteArray encodeRFC2047String( const QString &src, const QByteArray &charset, |
269 | bool , bool ) |
270 | { |
271 | QByteArray result; |
272 | int start=0, end=0; |
273 | bool nonAscii=false, ok=true, useQEncoding=false; |
274 | |
275 | // fromLatin1() is safe here, codecForName() uses toLatin1() internally |
276 | const QTextCodec *codec = KGlobal::charsets()->codecForName( QString::fromLatin1( charset ), ok ); |
277 | |
278 | QByteArray usedCS; |
279 | if ( !ok ) { |
280 | //no codec available => try local8Bit and hope the best ;-) |
281 | usedCS = KGlobal::locale()->encoding(); |
282 | codec = KGlobal::charsets()->codecForName( QString::fromLatin1( usedCS ), ok ); |
283 | } else { |
284 | Q_ASSERT( codec ); |
285 | if ( charset.isEmpty() ) { |
286 | usedCS = codec->name(); |
287 | } else { |
288 | usedCS = charset; |
289 | } |
290 | } |
291 | |
292 | QTextCodec::ConverterState converterState( QTextCodec::IgnoreHeader ); |
293 | QByteArray encoded8Bit = codec->fromUnicode( src.constData(), src.length(), &converterState ); |
294 | if ( converterState.invalidChars > 0 ) { |
295 | usedCS = "utf-8" ; |
296 | codec = QTextCodec::codecForName( usedCS ); |
297 | encoded8Bit = codec->fromUnicode( src ); |
298 | } |
299 | |
300 | if ( usedCS.contains( "8859-" ) ) { // use "B"-Encoding for non iso-8859-x charsets |
301 | useQEncoding = true; |
302 | } |
303 | |
304 | if ( allow8BitHeaders ) { |
305 | return encoded8Bit; |
306 | } |
307 | |
308 | uint encoded8BitLength = encoded8Bit.length(); |
309 | for ( unsigned int i=0; i<encoded8BitLength; i++ ) { |
310 | if ( encoded8Bit[i] == ' ' ) { // encoding starts at word boundaries |
311 | start = i + 1; |
312 | } |
313 | |
314 | // encode escape character, for japanese encodings... |
315 | if ( ( (signed char)encoded8Bit[i] < 0 ) || ( encoded8Bit[i] == '\033' ) || |
316 | ( addressHeader && ( strchr( "\"()<>@,.;:\\[]=" , encoded8Bit[i] ) != 0 ) ) ) { |
317 | end = start; // non us-ascii char found, now we determine where to stop encoding |
318 | nonAscii = true; |
319 | break; |
320 | } |
321 | } |
322 | |
323 | if ( nonAscii ) { |
324 | while ( ( end < encoded8Bit.length() ) && ( encoded8Bit[end] != ' ' ) ) { |
325 | // we encode complete words |
326 | end++; |
327 | } |
328 | |
329 | for ( int x=end; x<encoded8Bit.length(); x++ ) { |
330 | if ( ( (signed char)encoded8Bit[x] < 0 ) || ( encoded8Bit[x] == '\033' ) || |
331 | ( addressHeader && ( strchr( reservedCharacters, encoded8Bit[x] ) != 0 ) ) ) { |
332 | end = x; // we found another non-ascii word |
333 | |
334 | while ( ( end < encoded8Bit.length() ) && ( encoded8Bit[end] != ' ' ) ) { |
335 | // we encode complete words |
336 | end++; |
337 | } |
338 | } |
339 | } |
340 | |
341 | result = encoded8Bit.left( start ) + "=?" + usedCS; |
342 | |
343 | if ( useQEncoding ) { |
344 | result += "?Q?" ; |
345 | |
346 | char c, hexcode;// "Q"-encoding implementation described in RFC 2047 |
347 | for ( int i=start; i<end; i++ ) { |
348 | c = encoded8Bit[i]; |
349 | if ( c == ' ' ) { // make the result readable with not MIME-capable readers |
350 | result += '_'; |
351 | } else { |
352 | if ( ( ( c >= 'a' ) && ( c <= 'z' ) ) || // paranoid mode, encode *all* special chars to avoid problems |
353 | ( ( c >= 'A' ) && ( c <= 'Z' ) ) || // with "From" & "To" headers |
354 | ( ( c >= '0' ) && ( c <= '9' ) ) ) { |
355 | result += c; |
356 | } else { |
357 | result += '='; // "stolen" from KMail ;-) |
358 | hexcode = ( ( c & 0xF0 ) >> 4 ) + 48; |
359 | if ( hexcode >= 58 ) { |
360 | hexcode += 7; |
361 | } |
362 | result += hexcode; |
363 | hexcode = ( c & 0x0F ) + 48; |
364 | if ( hexcode >= 58 ) { |
365 | hexcode += 7; |
366 | } |
367 | result += hexcode; |
368 | } |
369 | } |
370 | } |
371 | } else { |
372 | result += "?B?" + encoded8Bit.mid( start, end - start ).toBase64(); |
373 | } |
374 | |
375 | result +="?=" ; |
376 | result += encoded8Bit.right( encoded8Bit.length() - end ); |
377 | } else { |
378 | result = encoded8Bit; |
379 | } |
380 | |
381 | return result; |
382 | } |
383 | |
384 | QByteArray encodeRFC2047Sentence(const QString& src, const QByteArray& charset ) |
385 | { |
386 | QByteArray result; |
387 | QList<QChar> splitChars; |
388 | splitChars << QLatin1Char( ',' ) << QLatin1Char( '\"' ) << QLatin1Char( ';' ) << QLatin1Char( '\\' ); |
389 | const QChar *ch = src.constData(); |
390 | const int length = src.length(); |
391 | int pos = 0; |
392 | int wordStart = 0; |
393 | |
394 | //qDebug() << "Input:" << src; |
395 | // Loop over all characters of the string. |
396 | // When encountering a split character, RFC-2047-encode the word before it, and add it to the result. |
397 | while ( pos < length ) { |
398 | //qDebug() << "Pos:" << pos << "Result:" << result << "Char:" << ch->toLatin1(); |
399 | const bool isAscii = ch->unicode() < 127; |
400 | const bool isReserved = ( strchr( reservedCharacters, ch->toLatin1() ) != 0 ); |
401 | if ( isAscii && isReserved ) { |
402 | const int wordSize = pos - wordStart; |
403 | if ( wordSize > 0 ) { |
404 | const QString word = src.mid( wordStart, wordSize ); |
405 | result += encodeRFC2047String( word, charset ); |
406 | } |
407 | |
408 | result += ch->toLatin1(); |
409 | wordStart = pos + 1; |
410 | } |
411 | ch++; |
412 | pos++; |
413 | } |
414 | |
415 | // Encode the last word |
416 | const int wordSize = pos - wordStart; |
417 | if ( wordSize > 0 ) { |
418 | const QString word = src.mid( wordStart, pos - wordStart ); |
419 | result += encodeRFC2047String( word, charset ); |
420 | } |
421 | |
422 | return result; |
423 | } |
424 | |
425 | |
426 | |
427 | //----------------------------------------------------------------------------- |
428 | QByteArray encodeRFC2231String( const QString& str, const QByteArray& charset ) |
429 | { |
430 | if ( str.isEmpty() ) { |
431 | return QByteArray(); |
432 | } |
433 | |
434 | const QTextCodec *codec = KGlobal::charsets()->codecForName( QString::fromLatin1( charset ) ); |
435 | QByteArray latin; |
436 | if ( charset == "us-ascii" ) { |
437 | latin = str.toLatin1(); |
438 | } else if ( codec ) { |
439 | latin = codec->fromUnicode( str ); |
440 | } else { |
441 | latin = str.toLocal8Bit(); |
442 | } |
443 | |
444 | char *l; |
445 | for ( l = latin.data(); *l; ++l ) { |
446 | if ( ( ( *l & 0xE0 ) == 0 ) || ( *l & 0x80 ) ) { |
447 | // *l is control character or 8-bit char |
448 | break; |
449 | } |
450 | } |
451 | if ( !*l ) { |
452 | return latin; |
453 | } |
454 | |
455 | QByteArray result = charset + "''" ; |
456 | for ( l = latin.data(); *l; ++l ) { |
457 | bool needsQuoting = ( *l & 0x80 ) || ( *l == '%' ); |
458 | if ( !needsQuoting ) { |
459 | const QByteArray especials = "()<>@,;:\"/[]?.= \033" ; |
460 | int len = especials.length(); |
461 | for ( int i = 0; i < len; i++ ) { |
462 | if ( *l == especials[i] ) { |
463 | needsQuoting = true; |
464 | break; |
465 | } |
466 | } |
467 | } |
468 | if ( needsQuoting ) { |
469 | result += '%'; |
470 | unsigned char hexcode; |
471 | hexcode = ( ( *l & 0xF0 ) >> 4 ) + 48; |
472 | if ( hexcode >= 58 ) { |
473 | hexcode += 7; |
474 | } |
475 | result += hexcode; |
476 | hexcode = ( *l & 0x0F ) + 48; |
477 | if ( hexcode >= 58 ) { |
478 | hexcode += 7; |
479 | } |
480 | result += hexcode; |
481 | } else { |
482 | result += *l; |
483 | } |
484 | } |
485 | return result; |
486 | } |
487 | |
488 | |
489 | //----------------------------------------------------------------------------- |
490 | QString decodeRFC2231String( const QByteArray &str, QByteArray &usedCS, const QByteArray &defaultCS, |
491 | bool forceCS ) |
492 | { |
493 | int p = str.indexOf( '\'' ); |
494 | if ( p < 0 ) { |
495 | return KGlobal::charsets()->codecForName( QString::fromLatin1( defaultCS ) )->toUnicode( str ); |
496 | } |
497 | |
498 | |
499 | QByteArray charset = str.left( p ); |
500 | |
501 | QByteArray st = str.mid( str.lastIndexOf( '\'' ) + 1 ); |
502 | |
503 | char ch, ch2; |
504 | p = 0; |
505 | while ( p < (int)st.length() ) { |
506 | if ( st.at( p ) == 37 ) { |
507 | // Only try to decode the percent-encoded character if the percent sign |
508 | // is really followed by two other characters, see testcase at bug 163024 |
509 | if ( p + 2 < st.length() ) { |
510 | ch = st.at( p + 1 ) - 48; |
511 | if ( ch > 16 ) { |
512 | ch -= 7; |
513 | } |
514 | ch2 = st.at( p + 2 ) - 48; |
515 | if ( ch2 > 16 ) { |
516 | ch2 -= 7; |
517 | } |
518 | st[p] = ch * 16 + ch2; |
519 | st.remove( p + 1, 2 ); |
520 | } |
521 | } |
522 | p++; |
523 | } |
524 | kDebug() << "Got pre-decoded:" << st; |
525 | QString result; |
526 | const QTextCodec * charsetcodec = KGlobal::charsets()->codecForName( QString::fromLatin1( charset ) ); |
527 | if ( !charsetcodec || forceCS ) { |
528 | charsetcodec = KGlobal::charsets()->codecForName( QString::fromLatin1( defaultCS ) ); |
529 | } |
530 | |
531 | usedCS = charsetcodec->name(); |
532 | return charsetcodec->toUnicode( st ); |
533 | } |
534 | |
535 | QString decodeRFC2231String( const QByteArray &src ) |
536 | { |
537 | QByteArray usedCS; |
538 | return decodeRFC2231String( src, usedCS, "utf-8" , false ); |
539 | } |
540 | |
541 | QByteArray uniqueString() |
542 | { |
543 | static char chars[] = "0123456789abcdefghijklmnopqrstuvxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" ; |
544 | time_t now; |
545 | char p[11]; |
546 | int pos, ran; |
547 | unsigned int timeval; |
548 | |
549 | p[10] = '\0'; |
550 | now = time( 0 ); |
551 | ran = 1 + (int)( 1000.0 * rand() / ( RAND_MAX + 1.0 ) ); |
552 | timeval = ( now / ran ) + getpid(); |
553 | |
554 | for ( int i = 0; i < 10; i++ ) { |
555 | pos = (int) ( 61.0 * rand() / ( RAND_MAX + 1.0 ) ); |
556 | //kDebug() << pos; |
557 | p[i] = chars[pos]; |
558 | } |
559 | |
560 | QByteArray ret; |
561 | ret.setNum( timeval ); |
562 | ret += '.'; |
563 | ret += p; |
564 | |
565 | return ret; |
566 | } |
567 | |
568 | QByteArray multiPartBoundary() |
569 | { |
570 | return "nextPart" + uniqueString(); |
571 | } |
572 | |
573 | QByteArray ( const QByteArray & ) |
574 | { |
575 | QByteArray result; |
576 | if ( header.isEmpty() ) { |
577 | return result; |
578 | } |
579 | |
580 | int pos = 0, foldBegin = 0, foldMid = 0, foldEnd = 0; |
581 | while ( ( foldMid = header.indexOf( '\n', pos ) ) >= 0 ) { |
582 | foldBegin = foldEnd = foldMid; |
583 | // find the first space before the line-break |
584 | while ( foldBegin > 0 ) { |
585 | if ( !QChar::fromLatin1( header[foldBegin - 1] ).isSpace() ) { |
586 | break; |
587 | } |
588 | --foldBegin; |
589 | } |
590 | // find the first non-space after the line-break |
591 | while ( foldEnd <= header.length() - 1 ) { |
592 | if ( QChar::fromLatin1( header[foldEnd] ).isSpace() ) { |
593 | ++foldEnd; |
594 | } else if ( foldEnd > 0 && header[foldEnd - 1] == '\n' && |
595 | header[foldEnd] == '=' && foldEnd + 2 < header.length() && |
596 | ( ( header[foldEnd + 1] == '0' && |
597 | header[foldEnd + 2] == '9' ) || |
598 | ( header[foldEnd + 1] == '2' && |
599 | header[foldEnd + 2] == '0' ) ) ) { |
600 | // bug #86302: malformed header continuation starting with =09/=20 |
601 | foldEnd += 3; |
602 | } |
603 | else { |
604 | break; |
605 | } |
606 | } |
607 | |
608 | result += header.mid( pos, foldBegin - pos ); |
609 | if ( foldEnd < header.length() - 1 ) { |
610 | result += ' '; |
611 | } |
612 | pos = foldEnd; |
613 | } |
614 | const int len = header.length(); |
615 | if ( len > pos ) { |
616 | result += header.mid( pos, len - pos ); |
617 | } |
618 | return result; |
619 | } |
620 | |
621 | int ( const QByteArray &src, int &dataBegin, bool *folded ) |
622 | { |
623 | int end = dataBegin; |
624 | int len = src.length() - 1; |
625 | |
626 | if ( folded ) { |
627 | *folded = false; |
628 | } |
629 | |
630 | if ( dataBegin < 0 ) { |
631 | // Not found |
632 | return -1; |
633 | } |
634 | |
635 | if ( dataBegin > len ) { |
636 | // No data available |
637 | return len + 1; |
638 | } |
639 | |
640 | // If the first line contains nothing, but the next line starts with a space |
641 | // or a tab, that means a stupid mail client has made the first header field line |
642 | // entirely empty, and has folded the rest to the next line(s). |
643 | if ( src.at( end ) == '\n' && end + 1 < len && |
644 | ( src[end + 1] == ' ' || src[end + 1] == '\t' ) ) { |
645 | |
646 | // Skip \n and first whitespace |
647 | dataBegin += 2; |
648 | end += 2; |
649 | } |
650 | |
651 | if ( src.at( end ) != '\n' ) { // check if the header is not empty |
652 | while ( true ) { |
653 | end = src.indexOf( '\n', end + 1 ); |
654 | if ( end == -1 || end == len ) { |
655 | // end of string |
656 | break; |
657 | } else if ( src[end + 1] == ' ' || src[end + 1] == '\t' || |
658 | ( src[end + 1] == '=' && end + 3 <= len && |
659 | ( ( src[end + 2] == '0' && src[end + 3] == '9' ) || |
660 | ( src[end + 2] == '2' && src[end + 3] == '0' ) ) ) ) { |
661 | // next line is header continuation or starts with =09/=20 (bug #86302) |
662 | if ( folded ) { |
663 | *folded = true; |
664 | } |
665 | } else { |
666 | // end of header (no header continuation) |
667 | break; |
668 | } |
669 | } |
670 | } |
671 | |
672 | if ( end < 0 ) { |
673 | end = len + 1; //take the rest of the string |
674 | } |
675 | return end; |
676 | } |
677 | |
678 | int ( const QByteArray &src, const QByteArray &name, int &end, int &dataBegin, bool *folded ) |
679 | { |
680 | QByteArray n = name; |
681 | n.append( ':' ); |
682 | int begin = -1; |
683 | |
684 | if ( qstrnicmp( n.constData(), src.constData(), n.length() ) == 0 ) { |
685 | begin = 0; |
686 | } else { |
687 | n.prepend( '\n' ); |
688 | const char *p = strcasestr( src.constData(), n.constData() ); |
689 | if ( !p ) { |
690 | begin = -1; |
691 | } else { |
692 | begin = p - src.constData(); |
693 | ++begin; |
694 | } |
695 | } |
696 | |
697 | if ( begin > -1 ) { //there is a header with the given name |
698 | dataBegin = begin + name.length() + 1; //skip the name |
699 | // skip the usual space after the colon |
700 | if ( src.at( dataBegin ) == ' ' ) { |
701 | ++dataBegin; |
702 | } |
703 | end = findHeaderLineEnd( src, dataBegin, folded ); |
704 | return begin; |
705 | |
706 | } else { |
707 | end = -1; |
708 | dataBegin = -1; |
709 | return -1; //header not found |
710 | } |
711 | } |
712 | |
713 | QByteArray ( const QByteArray &src, const QByteArray &name ) |
714 | { |
715 | int begin, end; |
716 | bool folded; |
717 | QByteArray result; |
718 | |
719 | if ( src.isEmpty() || indexOfHeader( src, name, end, begin, &folded ) < 0 ) { |
720 | return result; |
721 | } |
722 | |
723 | if ( begin >= 0 ) { |
724 | if ( !folded ) { |
725 | result = src.mid( begin, end - begin ); |
726 | } else { |
727 | if ( end > begin ) { |
728 | QByteArray hdrValue = src.mid( begin, end - begin ); |
729 | result = unfoldHeader( hdrValue ); |
730 | } |
731 | } |
732 | } |
733 | return result; |
734 | } |
735 | |
736 | QList<QByteArray> ( const QByteArray &src, const QByteArray &name ) |
737 | { |
738 | int begin, end; |
739 | bool folded; |
740 | QList<QByteArray> result; |
741 | QByteArray copySrc( src ); |
742 | |
743 | if ( indexOfHeader( copySrc, name, end, begin, &folded ) < 0 ) { |
744 | return result; |
745 | } |
746 | |
747 | while ( begin >= 0 ) { |
748 | if ( !folded ) { |
749 | result.append( copySrc.mid( begin, end - begin ) ); |
750 | } else { |
751 | QByteArray hdrValue = copySrc.mid( begin, end - begin ); |
752 | result.append( unfoldHeader( hdrValue ) ); |
753 | } |
754 | |
755 | // get the next one, a tiny bit ugly, but we don't want the previous to be found again... |
756 | copySrc = copySrc.mid( end ); |
757 | if ( indexOfHeader( copySrc, name, end, begin, &folded ) < 0 ) { |
758 | break; |
759 | } |
760 | } |
761 | return result; |
762 | } |
763 | |
764 | void ( QByteArray &, const QByteArray &name ) |
765 | { |
766 | int begin, end, dummy; |
767 | begin = indexOfHeader( header, name, end, dummy ); |
768 | if ( begin >= 0 ) { |
769 | header.remove( begin, end - begin + 1 ); |
770 | } |
771 | } |
772 | |
773 | QByteArray CRLFtoLF( const QByteArray &s ) |
774 | { |
775 | QByteArray ret = s; |
776 | ret.replace( "\r\n" , "\n" ); |
777 | return ret; |
778 | } |
779 | |
780 | QByteArray CRLFtoLF( const char *s ) |
781 | { |
782 | QByteArray ret = s; |
783 | return CRLFtoLF( ret ); |
784 | } |
785 | |
786 | QByteArray LFtoCRLF( const QByteArray &s ) |
787 | { |
788 | QByteArray ret = s; |
789 | ret.replace( '\n', "\r\n" ); |
790 | return ret; |
791 | } |
792 | |
793 | QByteArray LFtoCRLF( const char *s ) |
794 | { |
795 | QByteArray ret = s; |
796 | return LFtoCRLF( ret ); |
797 | } |
798 | |
799 | namespace { |
800 | template < typename StringType, typename CharType > void removeQuotesGeneric( StringType & str ) |
801 | { |
802 | bool inQuote = false; |
803 | for ( int i = 0; i < str.length(); ++i ) { |
804 | if ( str[i] == CharType( '"' ) ) { |
805 | str.remove( i, 1 ); |
806 | i--; |
807 | inQuote = !inQuote; |
808 | } else { |
809 | if ( inQuote && ( str[i] == CharType( '\\' ) ) ) { |
810 | str.remove( i, 1 ); |
811 | } |
812 | } |
813 | } |
814 | } |
815 | } |
816 | |
817 | void removeQuots( QByteArray &str ) |
818 | { |
819 | removeQuotesGeneric<QByteArray, char>( str ); |
820 | } |
821 | |
822 | void removeQuots( QString &str ) |
823 | { |
824 | removeQuotesGeneric<QString, QLatin1Char>( str ); |
825 | } |
826 | |
827 | template<class StringType,class CharType,class CharConverterType,class StringConverterType,class ToString> |
828 | void addQuotes_impl( StringType &str, bool forceQuotes ) |
829 | { |
830 | bool needsQuotes=false; |
831 | for ( int i=0; i < str.length(); i++ ) { |
832 | const CharType cur = str.at( i ); |
833 | if ( QString( ToString( str ) ).contains( QRegExp( QLatin1String( "\"|\\\\|=|\\]|\\[|:|;|,|\\.|,|@|<|>|\\)|\\(" ) ) ) ) { |
834 | needsQuotes = true; |
835 | } |
836 | if ( cur == CharConverterType( '\\' ) || cur == CharConverterType( '\"' ) ) { |
837 | str.insert( i, CharConverterType( '\\' ) ); |
838 | i++; |
839 | } |
840 | } |
841 | |
842 | if ( needsQuotes || forceQuotes ) { |
843 | str.insert( 0, CharConverterType( '\"' ) ); |
844 | str.append( StringConverterType( "\"" ) ); |
845 | } |
846 | } |
847 | |
848 | void addQuotes( QByteArray &str, bool forceQuotes ) |
849 | { |
850 | addQuotes_impl<QByteArray, char, char, char*, QLatin1String>( str, forceQuotes ); |
851 | } |
852 | |
853 | void addQuotes( QString &str, bool forceQuotes ) |
854 | { |
855 | addQuotes_impl<QString, QChar, QLatin1Char, QLatin1String, QString>( str, forceQuotes ); |
856 | } |
857 | |
858 | KMIME_EXPORT QString balanceBidiState( const QString &input ) |
859 | { |
860 | const int LRO = 0x202D; |
861 | const int RLO = 0x202E; |
862 | const int LRE = 0x202A; |
863 | const int RLE = 0x202B; |
864 | const int PDF = 0x202C; |
865 | |
866 | QString result = input; |
867 | |
868 | int openDirChangers = 0; |
869 | int numPDFsRemoved = 0; |
870 | for ( int i = 0; i < input.length(); i++ ) { |
871 | const ushort &code = input.at( i ).unicode(); |
872 | if ( code == LRO || code == RLO || code == LRE || code == RLE ) { |
873 | openDirChangers++; |
874 | } else if ( code == PDF ) { |
875 | if ( openDirChangers > 0 ) { |
876 | openDirChangers--; |
877 | } else { |
878 | // One PDF too much, remove it |
879 | kWarning() << "Possible Unicode spoofing (unexpected PDF) detected in" << input; |
880 | result.remove( i - numPDFsRemoved, 1 ); |
881 | numPDFsRemoved++; |
882 | } |
883 | } |
884 | } |
885 | |
886 | if ( openDirChangers > 0 ) { |
887 | kWarning() << "Possible Unicode spoofing detected in" << input; |
888 | |
889 | // At PDF chars to the end until the correct state is restored. |
890 | // As a special exception, when encountering quoted strings, place the PDF before |
891 | // the last quote. |
892 | for ( int i = openDirChangers; i > 0; i-- ) { |
893 | if ( result.endsWith( QLatin1Char( '"' ) ) ) { |
894 | result.insert( result.length() - 1, QChar( PDF ) ); |
895 | } else { |
896 | result += QChar( PDF ); |
897 | } |
898 | } |
899 | } |
900 | |
901 | return result; |
902 | } |
903 | |
904 | QString removeBidiControlChars( const QString &input ) |
905 | { |
906 | const int LRO = 0x202D; |
907 | const int RLO = 0x202E; |
908 | const int LRE = 0x202A; |
909 | const int RLE = 0x202B; |
910 | QString result = input; |
911 | result.remove( LRO ); |
912 | result.remove( RLO ); |
913 | result.remove( LRE ); |
914 | result.remove( RLE ); |
915 | return result; |
916 | } |
917 | |
918 | static bool isCryptoPart( Content* content ) |
919 | { |
920 | if ( !content->contentType( false ) ) { |
921 | return false; |
922 | } |
923 | |
924 | if ( content->contentType()->subType().toLower() == "octet-stream" && |
925 | !content->contentDisposition( false ) ) { |
926 | return false; |
927 | } |
928 | |
929 | const Headers::ContentType *contentType = content->contentType(); |
930 | const QByteArray lowerSubType = contentType->subType().toLower(); |
931 | return ( contentType->mediaType().toLower() == "application" && |
932 | ( lowerSubType == "pgp-encrypted" || |
933 | lowerSubType == "pgp-signature" || |
934 | lowerSubType == "pkcs7-mime" || |
935 | lowerSubType == "pkcs7-signature" || |
936 | lowerSubType == "x-pkcs7-signature" || |
937 | ( lowerSubType == "octet-stream" && |
938 | content->contentDisposition()->filename().toLower() == QLatin1String( "msg.asc" ) ) ) ); |
939 | } |
940 | |
941 | bool hasAttachment( Content* content ) |
942 | { |
943 | if ( !content ) { |
944 | return false; |
945 | } |
946 | |
947 | bool emptyFilename = true; |
948 | if ( content->contentDisposition( false ) && |
949 | !content->contentDisposition()->filename().isEmpty() ) { |
950 | emptyFilename = false; |
951 | } |
952 | |
953 | if ( emptyFilename && |
954 | content->contentType( false ) && |
955 | !content->contentType()->name().isEmpty() ) { |
956 | emptyFilename = false; |
957 | } |
958 | |
959 | // ignore crypto parts |
960 | if ( !emptyFilename && !isCryptoPart( content ) ) { |
961 | return true; |
962 | } |
963 | |
964 | // Ok, content itself is not an attachment. now we deal with multiparts |
965 | if ( content->contentType()->isMultipart() ) { |
966 | Q_FOREACH ( Content *child, content->contents() ) { |
967 | if ( hasAttachment( child ) ) { |
968 | return true; |
969 | } |
970 | } |
971 | } |
972 | return false; |
973 | } |
974 | |
975 | bool isSigned( Message *message ) |
976 | { |
977 | if ( !message ) { |
978 | return false; |
979 | } |
980 | |
981 | const KMime::Headers::ContentType* const contentType = message->contentType(); |
982 | if ( contentType->isSubtype( "signed" ) || |
983 | contentType->isSubtype( "pgp-signature" ) || |
984 | contentType->isSubtype( "pkcs7-signature" ) || |
985 | contentType->isSubtype( "x-pkcs7-signature" ) || |
986 | message->mainBodyPart( "multipart/signed" ) || |
987 | message->mainBodyPart( "application/pgp-signature" ) || |
988 | message->mainBodyPart( "application/pkcs7-signature" ) || |
989 | message->mainBodyPart( "application/x-pkcs7-signature" ) ) { |
990 | return true; |
991 | } |
992 | return false; |
993 | } |
994 | |
995 | bool isEncrypted( Message *message ) |
996 | { |
997 | if ( !message ) { |
998 | return false; |
999 | } |
1000 | |
1001 | const KMime::Headers::ContentType* const contentType = message->contentType(); |
1002 | if ( contentType->isSubtype( "encrypted" ) || |
1003 | contentType->isSubtype( "pgp-encrypted" ) || |
1004 | contentType->isSubtype( "pkcs7-mime" ) || |
1005 | message->mainBodyPart( "multipart/encrypted" ) || |
1006 | message->mainBodyPart( "application/pgp-encrypted" ) || |
1007 | message->mainBodyPart( "application/pkcs7-mime" ) ) { |
1008 | return true; |
1009 | } |
1010 | |
1011 | return false; |
1012 | } |
1013 | |
1014 | bool isInvitation( Content *content ) |
1015 | { |
1016 | if ( !content ) { |
1017 | return false; |
1018 | } |
1019 | |
1020 | const KMime::Headers::ContentType* const contentType = content->contentType( false ); |
1021 | |
1022 | if ( contentType && contentType->isMediatype( "text" ) && contentType->isSubtype( "calendar" ) ) { |
1023 | return true; |
1024 | } |
1025 | |
1026 | return false; |
1027 | } |
1028 | |
1029 | } // namespace KMime |
1030 | |