1/**********************************************************************
2 *
3 * rfccodecs.cpp - handler for various rfc/mime encodings
4 * Copyright (C) 2000 s.carstens@gmx.de
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Library General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Library General Public License for more details.
15 *
16 * You should have received a copy of the GNU Library General Public License
17 * along with this library; see the file COPYING.LIB. If not, write to
18 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
19 * Boston, MA 02110-1301, USA.
20 *
21 *********************************************************************/
22/**
23 * @file
24 * This file is part of the IMAP support library and defines the
25 * RfcCodecs class.
26 *
27 * @brief
28 * Defines the RfcCodecs class.
29 *
30 * @author Sven Carstens
31 */
32
33#include "rfccodecs.h"
34
35#include <ctype.h>
36#include <sys/types.h>
37
38#include <stdio.h>
39#include <stdlib.h>
40
41#include <QtCore/QTextCodec>
42#include <QtCore/QBuffer>
43#include <QtCore/QRegExp>
44#include <QtCore/QByteArray>
45#include <QtCore/QLatin1Char>
46#include <kcodecs.h>
47
48using namespace KIMAP;
49
50// This part taken from rfc 2192 IMAP URL Scheme. C. Newman. September 1997.
51// adapted to QT-Toolkit by Sven Carstens <s.carstens@gmx.de> 2000
52
53//@cond PRIVATE
54static const unsigned char base64chars[] =
55 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+,";
56#define UNDEFINED 64
57#define MAXLINE 76
58static const char especials[17] = "()<>@,;:\"/[]?.= ";
59
60/* UTF16 definitions */
61#define UTF16MASK 0x03FFUL
62#define UTF16SHIFT 10
63#define UTF16BASE 0x10000UL
64#define UTF16HIGHSTART 0xD800UL
65#define UTF16HIGHEND 0xDBFFUL
66#define UTF16LOSTART 0xDC00UL
67#define UTF16LOEND 0xDFFFUL
68//@endcond
69
70//-----------------------------------------------------------------------------
71QByteArray KIMAP::decodeImapFolderName( const QByteArray &inSrc )
72{
73 unsigned char c, i, bitcount;
74 unsigned long ucs4, utf16, bitbuf;
75 unsigned char base64[256], utf8[6];
76 unsigned int srcPtr = 0;
77 QByteArray dst;
78 QByteArray src = inSrc;
79 uint srcLen = inSrc.length();
80
81 /* initialize modified base64 decoding table */
82 memset( base64, UNDEFINED, sizeof( base64 ) );
83 for ( i = 0; i < sizeof( base64chars ); ++i ) {
84 base64[(int)base64chars[i]] = i;
85 }
86
87 /* loop until end of string */
88 while ( srcPtr < srcLen ) {
89 c = src[srcPtr++];
90 /* deal with literal characters and &- */
91 if ( c != '&' || src[srcPtr] == '-' ) {
92 /* encode literally */
93 dst += c;
94 /* skip over the '-' if this is an &- sequence */
95 if ( c == '&' ) {
96 srcPtr++;
97 }
98 } else {
99 /* convert modified UTF-7 -> UTF-16 -> UCS-4 -> UTF-8 -> HEX */
100 bitbuf = 0;
101 bitcount = 0;
102 ucs4 = 0;
103 while ( ( c = base64[(unsigned char)src[srcPtr]] ) != UNDEFINED ) {
104 ++srcPtr;
105 bitbuf = ( bitbuf << 6 ) | c;
106 bitcount += 6;
107 /* enough bits for a UTF-16 character? */
108 if ( bitcount >= 16 ) {
109 bitcount -= 16;
110 utf16 = ( bitcount ? bitbuf >> bitcount : bitbuf ) & 0xffff;
111 /* convert UTF16 to UCS4 */
112 if ( utf16 >= UTF16HIGHSTART && utf16 <= UTF16HIGHEND ) {
113 ucs4 = ( utf16 - UTF16HIGHSTART ) << UTF16SHIFT;
114 continue;
115 } else if ( utf16 >= UTF16LOSTART && utf16 <= UTF16LOEND ) {
116 ucs4 += utf16 - UTF16LOSTART + UTF16BASE;
117 } else {
118 ucs4 = utf16;
119 }
120 /* convert UTF-16 range of UCS4 to UTF-8 */
121 if ( ucs4 <= 0x7fUL ) {
122 utf8[0] = ucs4;
123 i = 1;
124 } else if ( ucs4 <= 0x7ffUL ) {
125 utf8[0] = 0xc0 | ( ucs4 >> 6 );
126 utf8[1] = 0x80 | ( ucs4 & 0x3f );
127 i = 2;
128 } else if ( ucs4 <= 0xffffUL ) {
129 utf8[0] = 0xe0 | ( ucs4 >> 12 );
130 utf8[1] = 0x80 | ( ( ucs4 >> 6 ) & 0x3f );
131 utf8[2] = 0x80 | ( ucs4 & 0x3f );
132 i = 3;
133 } else {
134 utf8[0] = 0xf0 | ( ucs4 >> 18 );
135 utf8[1] = 0x80 | ( ( ucs4 >> 12 ) & 0x3f );
136 utf8[2] = 0x80 | ( ( ucs4 >> 6 ) & 0x3f );
137 utf8[3] = 0x80 | ( ucs4 & 0x3f );
138 i = 4;
139 }
140 /* copy it */
141 for ( c = 0; c < i; ++c ) {
142 dst += utf8[c];
143 }
144 }
145 }
146 /* skip over trailing '-' in modified UTF-7 encoding */
147 if ( src[srcPtr] == '-' ) {
148 ++srcPtr;
149 }
150 }
151 }
152 return dst;
153}
154
155QString KIMAP::decodeImapFolderName( const QString &inSrc )
156{
157 return QString::fromUtf8( decodeImapFolderName( inSrc.toUtf8() ).data() );
158}
159
160//-----------------------------------------------------------------------------
161
162QByteArray KIMAP::quoteIMAP( const QByteArray &src )
163{
164 uint len = src.length();
165 QByteArray result;
166 result.reserve( 2 * len );
167 for ( unsigned int i = 0; i < len; i++ ) {
168 if ( src[i] == '"' || src[i] == '\\' ) {
169 result += '\\';
170 }
171 result += src[i];
172 }
173 result.squeeze();
174 return result;
175}
176
177QString KIMAP::quoteIMAP( const QString &src )
178{
179 uint len = src.length();
180 QString result;
181 result.reserve( 2 * len );
182 for ( unsigned int i = 0; i < len; i++ ) {
183 if ( src[i] == QLatin1Char('"') || src[i] == QLatin1Char('\\') ) {
184 result += QLatin1Char('\\');
185 }
186 result += src[i];
187 }
188 //result.squeeze(); - unnecessary and slow
189 return result;
190}
191
192//-----------------------------------------------------------------------------
193QString KIMAP::encodeImapFolderName( const QString &inSrc )
194{
195 return QString::fromUtf8( encodeImapFolderName( inSrc.toUtf8() ).data() );
196}
197
198QByteArray KIMAP::encodeImapFolderName( const QByteArray &inSrc )
199{
200 unsigned int utf8pos, utf8total, c, utf7mode, bitstogo, utf16flag;
201 unsigned int ucs4, bitbuf;
202 QByteArray src = inSrc;
203 QByteArray dst;
204
205 int srcPtr = 0;
206 utf7mode = 0;
207 utf8total = 0;
208 bitstogo = 0;
209 utf8pos = 0;
210 bitbuf = 0;
211 ucs4 = 0;
212 while ( srcPtr < src.length () ) {
213 c = (unsigned char)src[srcPtr++];
214 /* normal character? */
215 if ( c >= ' ' && c <= '~' ) {
216 /* switch out of UTF-7 mode */
217 if ( utf7mode ) {
218 if ( bitstogo ) {
219 dst += base64chars[( bitbuf << ( 6 - bitstogo ) ) & 0x3F];
220 bitstogo = 0;
221 }
222 dst += '-';
223 utf7mode = 0;
224 }
225 dst += c;
226 /* encode '&' as '&-' */
227 if ( c == '&' ) {
228 dst += '-';
229 }
230 continue;
231 }
232 /* switch to UTF-7 mode */
233 if ( !utf7mode ) {
234 dst += '&';
235 utf7mode = 1;
236 }
237 /* Encode US-ASCII characters as themselves */
238 if ( c < 0x80 ) {
239 ucs4 = c;
240 utf8total = 1;
241 } else if ( utf8total ) {
242 /* save UTF8 bits into UCS4 */
243 ucs4 = ( ucs4 << 6 ) | ( c & 0x3FUL );
244 if ( ++utf8pos < utf8total ) {
245 continue;
246 }
247 } else {
248 utf8pos = 1;
249 if ( c < 0xE0 ) {
250 utf8total = 2;
251 ucs4 = c & 0x1F;
252 } else if ( c < 0xF0 ) {
253 utf8total = 3;
254 ucs4 = c & 0x0F;
255 } else {
256 /* NOTE: can't convert UTF8 sequences longer than 4 */
257 utf8total = 4;
258 ucs4 = c & 0x03;
259 }
260 continue;
261 }
262 /* loop to split ucs4 into two utf16 chars if necessary */
263 utf8total = 0;
264 do
265 {
266 if ( ucs4 >= UTF16BASE ) {
267 ucs4 -= UTF16BASE;
268 bitbuf =
269 ( bitbuf << 16 ) | ( ( ucs4 >> UTF16SHIFT ) + UTF16HIGHSTART );
270 ucs4 = ( ucs4 & UTF16MASK ) + UTF16LOSTART;
271 utf16flag = 1;
272 } else {
273 bitbuf = ( bitbuf << 16 ) | ucs4;
274 utf16flag = 0;
275 }
276 bitstogo += 16;
277 /* spew out base64 */
278 while ( bitstogo >= 6 ) {
279 bitstogo -= 6;
280 dst +=
281 base64chars[( bitstogo ? ( bitbuf >> bitstogo ) : bitbuf ) & 0x3F];
282 }
283 }
284 while ( utf16flag );
285 }
286 /* if in UTF-7 mode, finish in ASCII */
287 if ( utf7mode ) {
288 if ( bitstogo ) {
289 dst += base64chars[( bitbuf << ( 6 - bitstogo ) ) & 0x3F];
290 }
291 dst += '-';
292 }
293 return quoteIMAP( dst );
294}
295
296//-----------------------------------------------------------------------------
297QTextCodec *KIMAP::codecForName( const QString &str )
298{
299 if ( str.isEmpty () ) {
300 return 0;
301 }
302 return QTextCodec::codecForName ( str.toLower ().
303 replace ( QLatin1String("windows"), QLatin1String("cp") ).toLatin1 () );
304}
305
306//-----------------------------------------------------------------------------
307const QString KIMAP::decodeRFC2047String( const QString &str )
308{
309 QString throw_away;
310
311 return decodeRFC2047String( str, throw_away );
312}
313
314//-----------------------------------------------------------------------------
315const QString KIMAP::decodeRFC2047String( const QString &str,
316 QString &charset )
317{
318 QString throw_away;
319
320 return decodeRFC2047String( str, charset, throw_away );
321}
322
323//-----------------------------------------------------------------------------
324const QString KIMAP::decodeRFC2047String( const QString &str,
325 QString &charset,
326 QString &language )
327{
328 //do we have a rfc string
329 if ( !str.contains( QLatin1String("=?") ) ) {
330 return str;
331 }
332
333 // FIXME get rid of the conversion?
334 QByteArray aStr = str.toLatin1 (); // QString.length() means Unicode chars
335 QByteArray result;
336 char *pos, *beg, *end, *mid = 0;
337 QByteArray cstr;
338 char encoding = 0, ch;
339 bool valid;
340 const int maxLen = 200;
341 int i;
342
343// result.truncate(aStr.length());
344 for ( pos = aStr.data (); *pos; pos++ ) {
345 if ( pos[0] != '=' || pos[1] != '?' ) {
346 result += *pos;
347 continue;
348 }
349 beg = pos + 2;
350 end = beg;
351 valid = true;
352 // parse charset name
353 for ( i = 2, pos += 2;
354 i < maxLen &&
355 ( *pos != '?' && ( ispunct( *pos ) || isalnum ( *pos ) ) );
356 i++ ) {
357 pos++;
358 }
359 if ( *pos != '?' || i < 4 || i >= maxLen ) {
360 valid = false;
361 } else {
362 charset = QLatin1String(QByteArray( beg, i - 1 )); // -2 + 1 for the zero
363 int pt = charset.lastIndexOf( QLatin1Char('*') );
364 if ( pt != -1 ) {
365 // save language for later usage
366 language = charset.right( charset.length () - pt - 1 );
367
368 // tie off language as defined in rfc2047
369 charset.truncate( pt );
370 }
371 // get encoding and check delimiting question marks
372 encoding = toupper( pos[1] );
373 if ( pos[2] != '?' ||
374 ( encoding != 'Q' && encoding != 'B' &&
375 encoding != 'q' && encoding != 'b' ) ) {
376 valid = false;
377 }
378 pos += 3;
379 i += 3;
380// kDebug() << "Charset:" << charset << "- Language:" << language << "-'" << pos << "'";
381 }
382 if ( valid ) {
383 mid = pos;
384 // search for end of encoded part
385 while ( i < maxLen && *pos && !( *pos == '?' && *( pos + 1 ) == '=' ) ) {
386 i++;
387 pos++;
388 }
389 end = pos + 2;//end now points to the first char after the encoded string
390 if ( i >= maxLen || !*pos ) {
391 valid = false;
392 }
393 }
394 if ( valid ) {
395 ch = *pos;
396 *pos = '\0';
397 cstr = QByteArray (mid).left( (int)( mid - pos - 1 ) );
398 if ( encoding == 'Q' ) {
399 // decode quoted printable text
400 for ( i = cstr.length () - 1; i >= 0; --i ) {
401 if ( cstr[i] == '_' ) {
402 cstr[i] = ' ';
403 }
404 }
405// kDebug() << "before QP '"
406// << cstr << "'";
407 cstr = KCodecs::quotedPrintableDecode( cstr );
408// kDebug() << "after QP '"
409// << cstr << "'";
410 } else {
411 // decode base64 text
412 cstr = QByteArray::fromBase64( cstr );
413 }
414 *pos = ch;
415 int len = cstr.length();
416 for ( i = 0; i < len; ++i ) {
417 result += cstr[i];
418 }
419
420 pos = end - 1;
421 } else {
422// kDebug() << "invalid";
423 //result += "=?";
424 //pos = beg -1; // because pos gets increased shortly afterwards
425 pos = beg - 2;
426 result += *pos++;
427 result += *pos;
428 }
429 }
430 if ( !charset.isEmpty () ) {
431 QTextCodec *aCodec = codecForName( QLatin1String(charset.toLatin1 ()) );
432 if ( aCodec ) {
433// kDebug() << "Codec is" << aCodec->name();
434 return aCodec->toUnicode( result );
435 }
436 }
437 return QLatin1String(result);
438}
439
440//-----------------------------------------------------------------------------
441const QString KIMAP::encodeRFC2047String( const QString &str )
442{
443 return QLatin1String(encodeRFC2047String( str.toLatin1() ));
444}
445
446//-----------------------------------------------------------------------------
447const QByteArray KIMAP::encodeRFC2047String( const QByteArray &str )
448{
449 if ( str.isEmpty () ) {
450 return str;
451 }
452
453 const signed char *latin =
454 reinterpret_cast<const signed char *>
455 ( str.data() ), *l, *start, *stop;
456 char hexcode;
457 int numQuotes, i;
458 int rptr = 0;
459 // My stats show this number results in 12 resize() out of 73,000
460 int resultLen = 3 * str.length() / 2;
461 QByteArray result( resultLen, '\0' );
462
463 while ( *latin ) {
464 l = latin;
465 start = latin;
466 while ( *l ) {
467 if ( *l == 32 ) {
468 start = l + 1;
469 }
470 if ( *l < 0 ) {
471 break;
472 }
473 l++;
474 }
475 if ( *l ) {
476 numQuotes = 1;
477 while ( *l ) {
478 /* The encoded word must be limited to 75 character */
479 for ( i = 0; i < 16; ++i ) {
480 if ( *l == especials[i] ) {
481 numQuotes++;
482 }
483 }
484 if ( *l < 0 ) {
485 numQuotes++;
486 }
487 /* Stop after 58 = 75 - 17 characters or at "<user@host..." */
488 if ( l - start + 2 * numQuotes >= 58 || *l == 60 ) {
489 break;
490 }
491 l++;
492 }
493 if ( *l ) {
494 stop = l - 1;
495 while ( stop >= start && *stop != 32 ) {
496 stop--;
497 }
498 if ( stop <= start ) {
499 stop = l;
500 }
501 } else {
502 stop = l;
503 }
504 if ( resultLen - rptr - 1 <= start - latin + 1 + 16 ) {
505 // =?iso-88...
506 resultLen += ( start - latin + 1 ) * 2 + 20; // more space
507 result.resize( resultLen );
508 }
509 while ( latin < start ) {
510 result[rptr++] = *latin;
511 latin++;
512 }
513 result.replace( rptr, 15, "=?iso-8859-1?q?" );
514 rptr += 15;
515 if ( resultLen - rptr - 1 <= 3 * ( stop - latin + 1 ) ) {
516 resultLen += ( stop - latin + 1 ) * 4 + 20; // more space
517 result.resize( resultLen );
518 }
519 while ( latin < stop ) {
520 // can add up to 3 chars/iteration
521 numQuotes = 0;
522 for ( i = 0; i < 16; ++i ) {
523 if ( *latin == especials[i] ) {
524 numQuotes = 1;
525 }
526 }
527 if ( *latin < 0 ) {
528 numQuotes = 1;
529 }
530 if ( numQuotes ) {
531 result[rptr++] = '=';
532 hexcode = ( ( *latin & 0xF0 ) >> 4 ) + 48;
533 if ( hexcode >= 58 ) {
534 hexcode += 7;
535 }
536 result[rptr++] = hexcode;
537 hexcode = ( *latin & 0x0F ) + 48;
538 if ( hexcode >= 58 ) {
539 hexcode += 7;
540 }
541 result[rptr++] = hexcode;
542 } else {
543 result[rptr++] = *latin;
544 }
545 latin++;
546 }
547 result[rptr++] = '?';
548 result[rptr++] = '=';
549 } else {
550 while ( *latin ) {
551 if ( rptr == resultLen - 1 ) {
552 resultLen += 30;
553 result.resize( resultLen );
554 }
555 result[rptr++] = *latin;
556 latin++;
557 }
558 }
559 }
560 result[rptr] = 0;
561 return result;
562}
563
564//-----------------------------------------------------------------------------
565const QString KIMAP::encodeRFC2231String( const QString &str )
566{
567 if ( str.isEmpty () ) {
568 return str;
569 }
570
571 signed char *latin = (signed char *)calloc( 1, str.length () + 1 );
572 char *latin_us = (char *)latin;
573 strcpy( latin_us, str.toLatin1 () );
574 signed char *l = latin;
575 char hexcode;
576 int i;
577 bool quote;
578 while ( *l ) {
579 if ( *l < 0 ) {
580 break;
581 }
582 l++;
583 }
584 if ( !*l ) {
585 free( latin );
586 return str;
587 }
588 QByteArray result;
589 l = latin;
590 while ( *l ) {
591 quote = *l < 0;
592 for ( i = 0; i < 16; ++i ) {
593 if ( *l == especials[i] ) {
594 quote = true;
595 }
596 }
597 if ( quote ) {
598 result += '%';
599 hexcode = ( ( *l & 0xF0 ) >> 4 ) + 48;
600 if ( hexcode >= 58 ) {
601 hexcode += 7;
602 }
603 result += hexcode;
604 hexcode = ( *l & 0x0F ) + 48;
605 if ( hexcode >= 58 ) {
606 hexcode += 7;
607 }
608 result += hexcode;
609 } else {
610 result += *l;
611 }
612 l++;
613 }
614 free( latin );
615 return QLatin1String(result);
616}
617
618//-----------------------------------------------------------------------------
619const QString KIMAP::decodeRFC2231String( const QString &str )
620{
621 int p = str.indexOf ( QLatin1Char('\'') );
622
623 //see if it is an rfc string
624 if ( p < 0 ) {
625 return str;
626 }
627
628 int l = str.lastIndexOf( QLatin1Char('\'') );
629
630 //second is language
631 if ( p >= l ) {
632 return str;
633 }
634
635 //first is charset or empty
636 //QString charset = str.left ( p );
637 QString st = str.mid ( l + 1 );
638 //QString language = str.mid ( p + 1, l - p - 1 );
639
640 //kDebug() << "Charset:" << charset << "Language:" << language;
641
642 char ch, ch2;
643 p = 0;
644 while ( p < (int) st.length () ) {
645 if ( st.at( p ) == 37 ) {
646 ch = st.at( p + 1 ).toLatin1 () - 48;
647 if ( ch > 16 ) {
648 ch -= 7;
649 }
650 ch2 = st.at( p + 2 ).toLatin1 () - 48;
651 if ( ch2 > 16 ) {
652 ch2 -= 7;
653 }
654 st.replace( p, 1, ch * 16 + ch2 );
655 st.remove ( p + 1, 2 );
656 }
657 p++;
658 }
659 return st;
660}
661