1/*
2 kmime_parsers.cpp
3
4 KMime, the KDE Internet mail/usenet news message library.
5 Copyright (c) 2001 the KMime authors.
6 See file AUTHORS for details
7
8 This library is free software; you can redistribute it and/or
9 modify it under the terms of the GNU Library General Public
10 License as published by the Free Software Foundation; either
11 version 2 of the License, or (at your option) any later version.
12
13 This library is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Library General Public License for more details.
17
18 You should have received a copy of the GNU Library General Public License
19 along with this library; see the file COPYING.LIB. If not, write to
20 the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
21 Boston, MA 02110-1301, USA.
22*/
23#include "kmime_parsers.h"
24
25#include <QtCore/QRegExp>
26#include <QtCore/QByteArray>
27
28using namespace KMime::Parser;
29
30namespace KMime {
31namespace Parser {
32
33MultiPart::MultiPart( const QByteArray &src, const QByteArray &boundary )
34{
35 s_rc=src;
36 b_oundary=boundary;
37}
38
39bool MultiPart::parse()
40{
41 QByteArray b = "--" + b_oundary, part;
42 int pos1=0, pos2=0, blen=b.length();
43
44 p_arts.clear();
45
46 //find the first valid boundary
47 while ( 1 ) {
48 if ( ( pos1 = s_rc.indexOf( b, pos1 ) ) == -1 || pos1 == 0 ||
49 s_rc[pos1-1] == '\n' ) { //valid boundary found or no boundary at all
50 break;
51 }
52 pos1 += blen; //boundary found but not valid => skip it;
53 }
54
55 if ( pos1 > -1 ) {
56 pos1 += blen;
57 if ( s_rc[pos1] == '-' && s_rc[pos1+1] == '-' ) {
58 // the only valid boundary is the end-boundary
59 // this message is *really* broken
60 pos1 = -1; //we give up
61 } else if ( ( pos1 - blen ) > 1 ) { //preamble present
62 p_reamble = s_rc.left( pos1 - blen - 1 );
63 }
64 }
65
66 while ( pos1 > -1 && pos2 > -1 ) {
67
68 //skip the rest of the line for the first boundary - the message-part starts here
69 if ( ( pos1 = s_rc.indexOf( '\n', pos1 ) ) > -1 ) {
70 //now search the next linebreak
71 //now find the next valid boundary
72 pos2=++pos1; //pos1 and pos2 point now to the beginning of the next line after the boundary
73 while ( 1 ) {
74 if ( ( pos2 = s_rc.indexOf( b, pos2 ) ) == -1 ||
75 s_rc[pos2-1] == '\n' ) { //valid boundary or no more boundaries found
76 break;
77 }
78 pos2 += blen; //boundary is invalid => skip it;
79 }
80
81 if ( pos2 == -1 ) { // no more boundaries found
82 part = s_rc.mid( pos1, s_rc.length() - pos1 ); //take the rest of the string
83 p_arts.append( part );
84 pos1 = -1;
85 pos2 = -1; //break;
86 } else {
87 part = s_rc.mid( pos1, pos2 - pos1 - 1 ); // pos2 - 1 (\n) is part of the boundary (see RFC 2046, section 5.1.1)
88 p_arts.append( part );
89 pos2 += blen; //pos2 points now to the first character after the boundary
90 if ( s_rc[pos2] == '-' && s_rc[pos2+1] == '-' ) { //end-boundary
91 pos1 = pos2 + 2; //pos1 points now to the character directly after the end-boundary
92
93 if ( ( pos1 = s_rc.indexOf( '\n', pos1 ) ) > -1 ) { //skip the rest of this line
94 //everything after the end-boundary is considered as the epilouge
95 e_pilouge = s_rc.mid( pos1 + 1, s_rc.length() - pos1 - 1 );
96 }
97 pos1 = -1;
98 pos2 = -1; //break
99 } else {
100 pos1 = pos2; //the search continues ...
101 }
102 }
103 }
104 }
105
106 return !p_arts.isEmpty();
107}
108
109//=============================================================================
110
111NonMimeParser::NonMimeParser( const QByteArray &src ) :
112 s_rc( src ), p_artNr( -1 ), t_otalNr( -1 )
113{
114}
115
116/**
117 * try to guess the mimetype from the file-extension
118 */
119QByteArray NonMimeParser::guessMimeType( const QByteArray &fileName )
120{
121 QByteArray tmp, mimeType;
122 int pos;
123
124 if ( !fileName.isEmpty() ) {
125 pos = fileName.lastIndexOf( '.' );
126 if ( pos++ != -1 ) {
127 tmp = fileName.mid( pos, fileName.length() - pos ).toUpper();
128 if ( tmp == "JPG" || tmp == "JPEG" ) {
129 mimeType = "image/jpeg";
130 } else if ( tmp == "GIF" ) {
131 mimeType = "image/gif";
132 } else if ( tmp == "PNG" ) {
133 mimeType = "image/png";
134 } else if ( tmp == "TIFF" || tmp == "TIF" ) {
135 mimeType = "image/tiff";
136 } else if ( tmp == "XPM" ) {
137 mimeType = "image/x-xpixmap";
138 } else if ( tmp == "XBM" ) {
139 mimeType = "image/x-xbitmap";
140 } else if ( tmp == "BMP" ) {
141 mimeType = "image/bmp";
142 } else if ( tmp == "TXT" ||
143 tmp == "ASC" ||
144 tmp == "H" ||
145 tmp == "C" ||
146 tmp == "CC" ||
147 tmp == "CPP" ) {
148 mimeType = "text/plain";
149 } else if ( tmp == "HTML" || tmp == "HTM" ) {
150 mimeType = "text/html";
151 } else {
152 mimeType = "application/octet-stream";
153 }
154 } else {
155 mimeType = "application/octet-stream";
156 }
157 } else {
158 mimeType = "application/octet-stream";
159 }
160
161 return mimeType;
162}
163
164//==============================================================================
165
166UUEncoded::UUEncoded( const QByteArray &src, const QByteArray &subject ) :
167 NonMimeParser( src ), s_ubject( subject )
168{}
169
170bool UUEncoded::parse()
171{
172 int currentPos=0;
173 bool success=true, firstIteration=true;
174
175 while ( success ) {
176 int beginPos=currentPos, uuStart=currentPos, endPos=0, lineCount=0, MCount=0, pos=0, len=0;
177 bool containsBegin=false, containsEnd=false;
178 QByteArray tmp, fileName;
179
180 if ( ( beginPos = QString::fromLatin1( s_rc ).indexOf( QRegExp( QLatin1String( "begin [0-9][0-9][0-9]" ) ),
181 currentPos ) ) > -1 &&
182 ( beginPos == 0 || s_rc.at( beginPos - 1 ) == '\n' ) ) {
183 containsBegin = true;
184 uuStart = s_rc.indexOf( '\n', beginPos );
185 if ( uuStart == -1 ) {//no more line breaks found, we give up
186 success = false;
187 break;
188 } else {
189 uuStart++; //points now at the beginning of the next line
190 }
191 } else {
192 beginPos=currentPos;
193 }
194
195 if ( ( endPos = s_rc.indexOf( "\nend", ( uuStart > 0 ) ? uuStart - 1 : 0 ) ) == -1 ) {
196 endPos = s_rc.length(); //no end found
197 } else {
198 containsEnd = true;
199 }
200
201 if ( ( containsBegin && containsEnd ) || firstIteration ) {
202
203 //printf("beginPos=%d , uuStart=%d , endPos=%d\n", beginPos, uuStart, endPos);
204 //all lines in a uuencoded text start with 'M'
205 for ( int idx=uuStart; idx<endPos; idx++ ) {
206 if ( s_rc[idx] == '\n' ) {
207 lineCount++;
208 if ( idx + 1 < endPos && s_rc[idx + 1] == 'M' ) {
209 idx++;
210 MCount++;
211 }
212 }
213 }
214
215 //printf("lineCount=%d , MCount=%d\n", lineCount, MCount);
216 if ( MCount == 0 || ( lineCount - MCount ) > 10 ||
217 ( ( !containsBegin || !containsEnd ) && ( MCount < 15 ) ) ) {
218 // harder check for split-articles
219 success = false;
220 break; //too many "non-M-Lines" found, we give up
221 }
222
223 if ( ( !containsBegin || !containsEnd ) && !s_ubject.isNull() ) {
224 // message may be split up => parse subject
225 QRegExp rx( QLatin1String( "[0-9]+/[0-9]+" ) );
226 pos = rx.indexIn( QLatin1String( s_ubject ), 0 );
227 len = rx.matchedLength();
228 if ( pos != -1 ) {
229 tmp = s_ubject.mid( pos, len );
230 pos = tmp.indexOf( '/' );
231 p_artNr = tmp.left( pos ).toInt();
232 t_otalNr = tmp.right( tmp.length() - pos - 1 ).toInt();
233 } else {
234 success = false;
235 break; //no "part-numbers" found in the subject, we give up
236 }
237 }
238
239 //everything before "begin" is text
240 if ( beginPos > 0 ) {
241 t_ext.append( s_rc.mid( currentPos, beginPos - currentPos ) );
242 }
243
244 if ( containsBegin ) {
245 //everything between "begin ### " and the next LF is considered as the filename
246 fileName = s_rc.mid( beginPos + 10, uuStart - beginPos - 11 );
247 } else {
248 fileName = "";
249 }
250 f_ilenames.append( fileName );
251 //everything beetween "begin" and "end" is uuencoded
252 b_ins.append( s_rc.mid( uuStart, endPos - uuStart + 1 ) );
253 m_imeTypes.append( guessMimeType( fileName ) );
254 firstIteration = false;
255
256 int next = s_rc.indexOf( '\n', endPos + 1 );
257 if ( next == -1 ) { //no more line breaks found, we give up
258 success = false;
259 break;
260 } else {
261 next++; //points now at the beginning of the next line
262 }
263 currentPos = next;
264
265 } else {
266 success = false;
267 }
268 }
269
270 // append trailing text part of the article
271 t_ext.append( s_rc.right( s_rc.length() - currentPos ) );
272
273 return ( ( b_ins.count() > 0 ) || isPartial() );
274}
275
276//==============================================================================
277
278YENCEncoded::YENCEncoded( const QByteArray &src ) :
279 NonMimeParser( src )
280{
281}
282
283bool YENCEncoded::yencMeta( QByteArray &src, const QByteArray &name, int *value )
284{
285 bool found = false;
286 QByteArray sought=name + '=';
287
288 int iPos = src.indexOf( sought );
289 if ( iPos > -1 ) {
290 int pos1 = src.indexOf( ' ', iPos );
291 int pos2 = src.indexOf( '\r', iPos );
292 int pos3 = src.indexOf( '\t', iPos );
293 int pos4 = src.indexOf( '\n', iPos );
294 if ( pos2 >= 0 && ( pos1 < 0 || pos1 > pos2 ) ) {
295 pos1 = pos2;
296 }
297 if ( pos3 >= 0 && ( pos1 < 0 || pos1 > pos3 ) ) {
298 pos1 = pos3;
299 }
300 if ( pos4 >= 0 && ( pos1 < 0 || pos1 > pos4 ) ) {
301 pos1 = pos4;
302 }
303 iPos=src.lastIndexOf( '=', pos1 ) + 1;
304 if ( iPos < pos1 ) {
305 char c = src.at( iPos );
306 if ( c>='0' && c<='9' ) {
307 found = true;
308 *value = src.mid( iPos, pos1 - iPos ).toInt();
309 }
310 }
311 }
312 return found;
313}
314
315bool YENCEncoded::parse()
316{
317 int currentPos=0;
318 bool success=true;
319
320 while ( success ) {
321 int beginPos=currentPos, yencStart=currentPos;
322 bool containsPart=false;
323 QByteArray fileName, mimeType;
324
325 if ( ( beginPos = s_rc.indexOf( "=ybegin ", currentPos ) ) > -1 &&
326 ( beginPos == 0 || s_rc.at( beginPos - 1 ) == '\n' ) ) {
327 yencStart = s_rc.indexOf( '\n', beginPos );
328 if ( yencStart == -1 ) { // no more line breaks found, give up
329 success = false;
330 break;
331 } else {
332 yencStart++;
333 if ( s_rc.indexOf( "=ypart", yencStart ) == yencStart ) {
334 containsPart = true;
335 yencStart = s_rc.indexOf( '\n', yencStart );
336 if ( yencStart == -1 ) {
337 success = false;
338 break;
339 }
340 yencStart++;
341 }
342 }
343 // Try to identify yenc meta data
344
345 // Filenames can contain any embedded chars until end of line
346 QByteArray meta = s_rc.mid( beginPos, yencStart - beginPos );
347 int namePos = meta.indexOf( "name=" );
348 if ( namePos == -1 ) {
349 success = false;
350 break;
351 }
352 int eolPos = meta.indexOf( '\r', namePos );
353 if ( eolPos == -1 ) {
354 eolPos = meta.indexOf( '\n', namePos );
355 }
356 if ( eolPos == -1 ) {
357 success = false;
358 break;
359 }
360 fileName = meta.mid( namePos + 5, eolPos - ( namePos + 5 ) );
361
362 // Other metadata is integer
363 int yencLine;
364 if ( !yencMeta( meta, "line", &yencLine ) ) {
365 success = false;
366 break;
367 }
368 int yencSize;
369 if ( !yencMeta( meta, "size", &yencSize ) ) {
370 success = false;
371 break;
372 }
373
374 int partBegin, partEnd;
375 if ( containsPart ) {
376 if ( !yencMeta( meta, "part", &p_artNr ) ) {
377 success = false;
378 break;
379 }
380 if ( !yencMeta( meta, "begin", &partBegin ) ||
381 !yencMeta( meta, "end", &partEnd ) ) {
382 success = false;
383 break;
384 }
385 if ( !yencMeta( meta, "total", &t_otalNr ) ) {
386 t_otalNr = p_artNr + 1;
387 }
388 if ( yencSize == partEnd - partBegin + 1 ) {
389 t_otalNr = 1;
390 } else {
391 yencSize = partEnd - partBegin + 1;
392 }
393 }
394
395 // We have a valid yenc header; now we extract the binary data
396 int totalSize = 0;
397 int pos = yencStart;
398 int len = s_rc.length();
399 bool lineStart = true;
400 int lineLength = 0;
401 bool containsEnd = false;
402 QByteArray binary;
403 binary.resize( yencSize );
404 while ( pos < len ) {
405 int ch = s_rc.at( pos );
406 if ( ch < 0 ) {
407 ch += 256;
408 }
409 if ( ch == '\r' ) {
410 if ( lineLength != yencLine && totalSize != yencSize ) {
411 break;
412 }
413 pos++;
414 }
415 else if ( ch == '\n' ) {
416 lineStart = true;
417 lineLength = 0;
418 pos++;
419 } else {
420 if ( ch == '=' ) {
421 if ( pos + 1 < len ) {
422 ch = s_rc.at( pos + 1 );
423 if ( lineStart && ch == 'y' ) {
424 containsEnd = true;
425 break;
426 }
427 pos += 2;
428 ch -= 64+42;
429 if ( ch < 0 ) {
430 ch += 256;
431 }
432 if ( totalSize >= yencSize ) {
433 break;
434 }
435 binary[totalSize++] = ch;
436 lineLength++;
437 } else {
438 break;
439 }
440 } else {
441 ch -= 42;
442 if ( ch < 0 ) {
443 ch += 256;
444 }
445 if ( totalSize >= yencSize ) {
446 break;
447 }
448 binary[totalSize++] = ch;
449 lineLength++;
450 pos++;
451 }
452 lineStart = false;
453 }
454 }
455
456 if ( !containsEnd ) {
457 success = false;
458 break;
459 }
460 if ( totalSize != yencSize ) {
461 success = false;
462 break;
463 }
464
465 // pos now points to =yend; get end data
466 eolPos = s_rc.indexOf( '\n', pos );
467 if ( eolPos == -1 ) {
468 success = false;
469 break;
470 }
471 meta = s_rc.mid( pos, eolPos - pos );
472 if ( !yencMeta( meta, "size", &totalSize ) ) {
473 success = false;
474 break;
475 }
476 if ( totalSize != yencSize ) {
477 success = false;
478 break;
479 }
480
481 f_ilenames.append( fileName );
482 m_imeTypes.append( guessMimeType( fileName ) );
483 b_ins.append( binary );
484
485 //everything before "begin" is text
486 if ( beginPos > 0 ) {
487 t_ext.append( s_rc.mid( currentPos, beginPos - currentPos ) );
488 }
489 currentPos = eolPos + 1;
490
491 } else {
492 success = false;
493 }
494 }
495
496 // append trailing text part of the article
497 t_ext.append( s_rc.right( s_rc.length() - currentPos ) );
498
499 return b_ins.count()>0;
500}
501
502} // namespace Parser
503
504} // namespace KMime
505