1 | /* |
2 | kmime_parsers.cpp |
3 | |
4 | KMime, the KDE Internet mail/usenet news message library. |
5 | Copyright (c) 2001 the KMime authors. |
6 | See file AUTHORS for details |
7 | |
8 | This library is free software; you can redistribute it and/or |
9 | modify it under the terms of the GNU Library General Public |
10 | License as published by the Free Software Foundation; either |
11 | version 2 of the License, or (at your option) any later version. |
12 | |
13 | This library is distributed in the hope that it will be useful, |
14 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
15 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
16 | Library General Public License for more details. |
17 | |
18 | You should have received a copy of the GNU Library General Public License |
19 | along with this library; see the file COPYING.LIB. If not, write to |
20 | the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, |
21 | Boston, MA 02110-1301, USA. |
22 | */ |
23 | #include "kmime_parsers.h" |
24 | |
25 | #include <QtCore/QRegExp> |
26 | #include <QtCore/QByteArray> |
27 | |
28 | using namespace KMime::Parser; |
29 | |
30 | namespace KMime { |
31 | namespace Parser { |
32 | |
33 | MultiPart::MultiPart( const QByteArray &src, const QByteArray &boundary ) |
34 | { |
35 | s_rc=src; |
36 | b_oundary=boundary; |
37 | } |
38 | |
39 | bool MultiPart::parse() |
40 | { |
41 | QByteArray b = "--" + b_oundary, part; |
42 | int pos1=0, pos2=0, blen=b.length(); |
43 | |
44 | p_arts.clear(); |
45 | |
46 | //find the first valid boundary |
47 | while ( 1 ) { |
48 | if ( ( pos1 = s_rc.indexOf( b, pos1 ) ) == -1 || pos1 == 0 || |
49 | s_rc[pos1-1] == '\n' ) { //valid boundary found or no boundary at all |
50 | break; |
51 | } |
52 | pos1 += blen; //boundary found but not valid => skip it; |
53 | } |
54 | |
55 | if ( pos1 > -1 ) { |
56 | pos1 += blen; |
57 | if ( s_rc[pos1] == '-' && s_rc[pos1+1] == '-' ) { |
58 | // the only valid boundary is the end-boundary |
59 | // this message is *really* broken |
60 | pos1 = -1; //we give up |
61 | } else if ( ( pos1 - blen ) > 1 ) { //preamble present |
62 | p_reamble = s_rc.left( pos1 - blen - 1 ); |
63 | } |
64 | } |
65 | |
66 | while ( pos1 > -1 && pos2 > -1 ) { |
67 | |
68 | //skip the rest of the line for the first boundary - the message-part starts here |
69 | if ( ( pos1 = s_rc.indexOf( '\n', pos1 ) ) > -1 ) { |
70 | //now search the next linebreak |
71 | //now find the next valid boundary |
72 | pos2=++pos1; //pos1 and pos2 point now to the beginning of the next line after the boundary |
73 | while ( 1 ) { |
74 | if ( ( pos2 = s_rc.indexOf( b, pos2 ) ) == -1 || |
75 | s_rc[pos2-1] == '\n' ) { //valid boundary or no more boundaries found |
76 | break; |
77 | } |
78 | pos2 += blen; //boundary is invalid => skip it; |
79 | } |
80 | |
81 | if ( pos2 == -1 ) { // no more boundaries found |
82 | part = s_rc.mid( pos1, s_rc.length() - pos1 ); //take the rest of the string |
83 | p_arts.append( part ); |
84 | pos1 = -1; |
85 | pos2 = -1; //break; |
86 | } else { |
87 | part = s_rc.mid( pos1, pos2 - pos1 - 1 ); // pos2 - 1 (\n) is part of the boundary (see RFC 2046, section 5.1.1) |
88 | p_arts.append( part ); |
89 | pos2 += blen; //pos2 points now to the first character after the boundary |
90 | if ( s_rc[pos2] == '-' && s_rc[pos2+1] == '-' ) { //end-boundary |
91 | pos1 = pos2 + 2; //pos1 points now to the character directly after the end-boundary |
92 | |
93 | if ( ( pos1 = s_rc.indexOf( '\n', pos1 ) ) > -1 ) { //skip the rest of this line |
94 | //everything after the end-boundary is considered as the epilouge |
95 | e_pilouge = s_rc.mid( pos1 + 1, s_rc.length() - pos1 - 1 ); |
96 | } |
97 | pos1 = -1; |
98 | pos2 = -1; //break |
99 | } else { |
100 | pos1 = pos2; //the search continues ... |
101 | } |
102 | } |
103 | } |
104 | } |
105 | |
106 | return !p_arts.isEmpty(); |
107 | } |
108 | |
109 | //============================================================================= |
110 | |
111 | NonMimeParser::NonMimeParser( const QByteArray &src ) : |
112 | s_rc( src ), p_artNr( -1 ), t_otalNr( -1 ) |
113 | { |
114 | } |
115 | |
116 | /** |
117 | * try to guess the mimetype from the file-extension |
118 | */ |
119 | QByteArray NonMimeParser::guessMimeType( const QByteArray &fileName ) |
120 | { |
121 | QByteArray tmp, mimeType; |
122 | int pos; |
123 | |
124 | if ( !fileName.isEmpty() ) { |
125 | pos = fileName.lastIndexOf( '.' ); |
126 | if ( pos++ != -1 ) { |
127 | tmp = fileName.mid( pos, fileName.length() - pos ).toUpper(); |
128 | if ( tmp == "JPG" || tmp == "JPEG" ) { |
129 | mimeType = "image/jpeg" ; |
130 | } else if ( tmp == "GIF" ) { |
131 | mimeType = "image/gif" ; |
132 | } else if ( tmp == "PNG" ) { |
133 | mimeType = "image/png" ; |
134 | } else if ( tmp == "TIFF" || tmp == "TIF" ) { |
135 | mimeType = "image/tiff" ; |
136 | } else if ( tmp == "XPM" ) { |
137 | mimeType = "image/x-xpixmap" ; |
138 | } else if ( tmp == "XBM" ) { |
139 | mimeType = "image/x-xbitmap" ; |
140 | } else if ( tmp == "BMP" ) { |
141 | mimeType = "image/bmp" ; |
142 | } else if ( tmp == "TXT" || |
143 | tmp == "ASC" || |
144 | tmp == "H" || |
145 | tmp == "C" || |
146 | tmp == "CC" || |
147 | tmp == "CPP" ) { |
148 | mimeType = "text/plain" ; |
149 | } else if ( tmp == "HTML" || tmp == "HTM" ) { |
150 | mimeType = "text/html" ; |
151 | } else { |
152 | mimeType = "application/octet-stream" ; |
153 | } |
154 | } else { |
155 | mimeType = "application/octet-stream" ; |
156 | } |
157 | } else { |
158 | mimeType = "application/octet-stream" ; |
159 | } |
160 | |
161 | return mimeType; |
162 | } |
163 | |
164 | //============================================================================== |
165 | |
166 | UUEncoded::UUEncoded( const QByteArray &src, const QByteArray &subject ) : |
167 | NonMimeParser( src ), s_ubject( subject ) |
168 | {} |
169 | |
170 | bool UUEncoded::parse() |
171 | { |
172 | int currentPos=0; |
173 | bool success=true, firstIteration=true; |
174 | |
175 | while ( success ) { |
176 | int beginPos=currentPos, uuStart=currentPos, endPos=0, lineCount=0, MCount=0, pos=0, len=0; |
177 | bool containsBegin=false, containsEnd=false; |
178 | QByteArray tmp, fileName; |
179 | |
180 | if ( ( beginPos = QString::fromLatin1( s_rc ).indexOf( QRegExp( QLatin1String( "begin [0-9][0-9][0-9]" ) ), |
181 | currentPos ) ) > -1 && |
182 | ( beginPos == 0 || s_rc.at( beginPos - 1 ) == '\n' ) ) { |
183 | containsBegin = true; |
184 | uuStart = s_rc.indexOf( '\n', beginPos ); |
185 | if ( uuStart == -1 ) {//no more line breaks found, we give up |
186 | success = false; |
187 | break; |
188 | } else { |
189 | uuStart++; //points now at the beginning of the next line |
190 | } |
191 | } else { |
192 | beginPos=currentPos; |
193 | } |
194 | |
195 | if ( ( endPos = s_rc.indexOf( "\nend" , ( uuStart > 0 ) ? uuStart - 1 : 0 ) ) == -1 ) { |
196 | endPos = s_rc.length(); //no end found |
197 | } else { |
198 | containsEnd = true; |
199 | } |
200 | |
201 | if ( ( containsBegin && containsEnd ) || firstIteration ) { |
202 | |
203 | //printf("beginPos=%d , uuStart=%d , endPos=%d\n", beginPos, uuStart, endPos); |
204 | //all lines in a uuencoded text start with 'M' |
205 | for ( int idx=uuStart; idx<endPos; idx++ ) { |
206 | if ( s_rc[idx] == '\n' ) { |
207 | lineCount++; |
208 | if ( idx + 1 < endPos && s_rc[idx + 1] == 'M' ) { |
209 | idx++; |
210 | MCount++; |
211 | } |
212 | } |
213 | } |
214 | |
215 | //printf("lineCount=%d , MCount=%d\n", lineCount, MCount); |
216 | if ( MCount == 0 || ( lineCount - MCount ) > 10 || |
217 | ( ( !containsBegin || !containsEnd ) && ( MCount < 15 ) ) ) { |
218 | // harder check for split-articles |
219 | success = false; |
220 | break; //too many "non-M-Lines" found, we give up |
221 | } |
222 | |
223 | if ( ( !containsBegin || !containsEnd ) && !s_ubject.isNull() ) { |
224 | // message may be split up => parse subject |
225 | QRegExp rx( QLatin1String( "[0-9]+/[0-9]+" ) ); |
226 | pos = rx.indexIn( QLatin1String( s_ubject ), 0 ); |
227 | len = rx.matchedLength(); |
228 | if ( pos != -1 ) { |
229 | tmp = s_ubject.mid( pos, len ); |
230 | pos = tmp.indexOf( '/' ); |
231 | p_artNr = tmp.left( pos ).toInt(); |
232 | t_otalNr = tmp.right( tmp.length() - pos - 1 ).toInt(); |
233 | } else { |
234 | success = false; |
235 | break; //no "part-numbers" found in the subject, we give up |
236 | } |
237 | } |
238 | |
239 | //everything before "begin" is text |
240 | if ( beginPos > 0 ) { |
241 | t_ext.append( s_rc.mid( currentPos, beginPos - currentPos ) ); |
242 | } |
243 | |
244 | if ( containsBegin ) { |
245 | //everything between "begin ### " and the next LF is considered as the filename |
246 | fileName = s_rc.mid( beginPos + 10, uuStart - beginPos - 11 ); |
247 | } else { |
248 | fileName = "" ; |
249 | } |
250 | f_ilenames.append( fileName ); |
251 | //everything beetween "begin" and "end" is uuencoded |
252 | b_ins.append( s_rc.mid( uuStart, endPos - uuStart + 1 ) ); |
253 | m_imeTypes.append( guessMimeType( fileName ) ); |
254 | firstIteration = false; |
255 | |
256 | int next = s_rc.indexOf( '\n', endPos + 1 ); |
257 | if ( next == -1 ) { //no more line breaks found, we give up |
258 | success = false; |
259 | break; |
260 | } else { |
261 | next++; //points now at the beginning of the next line |
262 | } |
263 | currentPos = next; |
264 | |
265 | } else { |
266 | success = false; |
267 | } |
268 | } |
269 | |
270 | // append trailing text part of the article |
271 | t_ext.append( s_rc.right( s_rc.length() - currentPos ) ); |
272 | |
273 | return ( ( b_ins.count() > 0 ) || isPartial() ); |
274 | } |
275 | |
276 | //============================================================================== |
277 | |
278 | YENCEncoded::YENCEncoded( const QByteArray &src ) : |
279 | NonMimeParser( src ) |
280 | { |
281 | } |
282 | |
283 | bool YENCEncoded::yencMeta( QByteArray &src, const QByteArray &name, int *value ) |
284 | { |
285 | bool found = false; |
286 | QByteArray sought=name + '='; |
287 | |
288 | int iPos = src.indexOf( sought ); |
289 | if ( iPos > -1 ) { |
290 | int pos1 = src.indexOf( ' ', iPos ); |
291 | int pos2 = src.indexOf( '\r', iPos ); |
292 | int pos3 = src.indexOf( '\t', iPos ); |
293 | int pos4 = src.indexOf( '\n', iPos ); |
294 | if ( pos2 >= 0 && ( pos1 < 0 || pos1 > pos2 ) ) { |
295 | pos1 = pos2; |
296 | } |
297 | if ( pos3 >= 0 && ( pos1 < 0 || pos1 > pos3 ) ) { |
298 | pos1 = pos3; |
299 | } |
300 | if ( pos4 >= 0 && ( pos1 < 0 || pos1 > pos4 ) ) { |
301 | pos1 = pos4; |
302 | } |
303 | iPos=src.lastIndexOf( '=', pos1 ) + 1; |
304 | if ( iPos < pos1 ) { |
305 | char c = src.at( iPos ); |
306 | if ( c>='0' && c<='9' ) { |
307 | found = true; |
308 | *value = src.mid( iPos, pos1 - iPos ).toInt(); |
309 | } |
310 | } |
311 | } |
312 | return found; |
313 | } |
314 | |
315 | bool YENCEncoded::parse() |
316 | { |
317 | int currentPos=0; |
318 | bool success=true; |
319 | |
320 | while ( success ) { |
321 | int beginPos=currentPos, yencStart=currentPos; |
322 | bool containsPart=false; |
323 | QByteArray fileName, mimeType; |
324 | |
325 | if ( ( beginPos = s_rc.indexOf( "=ybegin " , currentPos ) ) > -1 && |
326 | ( beginPos == 0 || s_rc.at( beginPos - 1 ) == '\n' ) ) { |
327 | yencStart = s_rc.indexOf( '\n', beginPos ); |
328 | if ( yencStart == -1 ) { // no more line breaks found, give up |
329 | success = false; |
330 | break; |
331 | } else { |
332 | yencStart++; |
333 | if ( s_rc.indexOf( "=ypart" , yencStart ) == yencStart ) { |
334 | containsPart = true; |
335 | yencStart = s_rc.indexOf( '\n', yencStart ); |
336 | if ( yencStart == -1 ) { |
337 | success = false; |
338 | break; |
339 | } |
340 | yencStart++; |
341 | } |
342 | } |
343 | // Try to identify yenc meta data |
344 | |
345 | // Filenames can contain any embedded chars until end of line |
346 | QByteArray meta = s_rc.mid( beginPos, yencStart - beginPos ); |
347 | int namePos = meta.indexOf( "name=" ); |
348 | if ( namePos == -1 ) { |
349 | success = false; |
350 | break; |
351 | } |
352 | int eolPos = meta.indexOf( '\r', namePos ); |
353 | if ( eolPos == -1 ) { |
354 | eolPos = meta.indexOf( '\n', namePos ); |
355 | } |
356 | if ( eolPos == -1 ) { |
357 | success = false; |
358 | break; |
359 | } |
360 | fileName = meta.mid( namePos + 5, eolPos - ( namePos + 5 ) ); |
361 | |
362 | // Other metadata is integer |
363 | int yencLine; |
364 | if ( !yencMeta( meta, "line" , ¥cLine ) ) { |
365 | success = false; |
366 | break; |
367 | } |
368 | int yencSize; |
369 | if ( !yencMeta( meta, "size" , ¥cSize ) ) { |
370 | success = false; |
371 | break; |
372 | } |
373 | |
374 | int partBegin, partEnd; |
375 | if ( containsPart ) { |
376 | if ( !yencMeta( meta, "part" , &p_artNr ) ) { |
377 | success = false; |
378 | break; |
379 | } |
380 | if ( !yencMeta( meta, "begin" , &partBegin ) || |
381 | !yencMeta( meta, "end" , &partEnd ) ) { |
382 | success = false; |
383 | break; |
384 | } |
385 | if ( !yencMeta( meta, "total" , &t_otalNr ) ) { |
386 | t_otalNr = p_artNr + 1; |
387 | } |
388 | if ( yencSize == partEnd - partBegin + 1 ) { |
389 | t_otalNr = 1; |
390 | } else { |
391 | yencSize = partEnd - partBegin + 1; |
392 | } |
393 | } |
394 | |
395 | // We have a valid yenc header; now we extract the binary data |
396 | int totalSize = 0; |
397 | int pos = yencStart; |
398 | int len = s_rc.length(); |
399 | bool lineStart = true; |
400 | int lineLength = 0; |
401 | bool containsEnd = false; |
402 | QByteArray binary; |
403 | binary.resize( yencSize ); |
404 | while ( pos < len ) { |
405 | int ch = s_rc.at( pos ); |
406 | if ( ch < 0 ) { |
407 | ch += 256; |
408 | } |
409 | if ( ch == '\r' ) { |
410 | if ( lineLength != yencLine && totalSize != yencSize ) { |
411 | break; |
412 | } |
413 | pos++; |
414 | } |
415 | else if ( ch == '\n' ) { |
416 | lineStart = true; |
417 | lineLength = 0; |
418 | pos++; |
419 | } else { |
420 | if ( ch == '=' ) { |
421 | if ( pos + 1 < len ) { |
422 | ch = s_rc.at( pos + 1 ); |
423 | if ( lineStart && ch == 'y' ) { |
424 | containsEnd = true; |
425 | break; |
426 | } |
427 | pos += 2; |
428 | ch -= 64+42; |
429 | if ( ch < 0 ) { |
430 | ch += 256; |
431 | } |
432 | if ( totalSize >= yencSize ) { |
433 | break; |
434 | } |
435 | binary[totalSize++] = ch; |
436 | lineLength++; |
437 | } else { |
438 | break; |
439 | } |
440 | } else { |
441 | ch -= 42; |
442 | if ( ch < 0 ) { |
443 | ch += 256; |
444 | } |
445 | if ( totalSize >= yencSize ) { |
446 | break; |
447 | } |
448 | binary[totalSize++] = ch; |
449 | lineLength++; |
450 | pos++; |
451 | } |
452 | lineStart = false; |
453 | } |
454 | } |
455 | |
456 | if ( !containsEnd ) { |
457 | success = false; |
458 | break; |
459 | } |
460 | if ( totalSize != yencSize ) { |
461 | success = false; |
462 | break; |
463 | } |
464 | |
465 | // pos now points to =yend; get end data |
466 | eolPos = s_rc.indexOf( '\n', pos ); |
467 | if ( eolPos == -1 ) { |
468 | success = false; |
469 | break; |
470 | } |
471 | meta = s_rc.mid( pos, eolPos - pos ); |
472 | if ( !yencMeta( meta, "size" , &totalSize ) ) { |
473 | success = false; |
474 | break; |
475 | } |
476 | if ( totalSize != yencSize ) { |
477 | success = false; |
478 | break; |
479 | } |
480 | |
481 | f_ilenames.append( fileName ); |
482 | m_imeTypes.append( guessMimeType( fileName ) ); |
483 | b_ins.append( binary ); |
484 | |
485 | //everything before "begin" is text |
486 | if ( beginPos > 0 ) { |
487 | t_ext.append( s_rc.mid( currentPos, beginPos - currentPos ) ); |
488 | } |
489 | currentPos = eolPos + 1; |
490 | |
491 | } else { |
492 | success = false; |
493 | } |
494 | } |
495 | |
496 | // append trailing text part of the article |
497 | t_ext.append( s_rc.right( s_rc.length() - currentPos ) ); |
498 | |
499 | return b_ins.count()>0; |
500 | } |
501 | |
502 | } // namespace Parser |
503 | |
504 | } // namespace KMime |
505 | |