1 | /* -*- c++ -*- |
2 | kmime_header_parsing.cpp |
3 | |
4 | KMime, the KDE Internet mail/usenet news message library. |
5 | Copyright (c) 2001-2002 Marc Mutz <mutz@kde.org> |
6 | |
7 | This library is free software; you can redistribute it and/or |
8 | modify it under the terms of the GNU Library General Public |
9 | License as published by the Free Software Foundation; either |
10 | version 2 of the License, or (at your option) any later version. |
11 | |
12 | This library is distributed in the hope that it will be useful, |
13 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
15 | Library General Public License for more details. |
16 | |
17 | You should have received a copy of the GNU Library General Public License |
18 | along with this library; see the file COPYING.LIB. If not, write to |
19 | the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, |
20 | Boston, MA 02110-1301, USA. |
21 | */ |
22 | |
23 | #include "kmime_header_parsing.h" |
24 | |
25 | #include "kmime_codecs.h" |
26 | #include "kmime_headerfactory_p.h" |
27 | #include "kmime_headers.h" |
28 | #include "kmime_util.h" |
29 | #include "kmime_util_p.h" |
30 | #include "kmime_dateformatter.h" |
31 | #include "kmime_warning.h" |
32 | |
33 | #include <kglobal.h> |
34 | #include <kcharsets.h> |
35 | |
36 | #include <QtCore/QTextCodec> |
37 | #include <QtCore/QMap> |
38 | #include <QtCore/QStringList> |
39 | #include <QtCore/QUrl> |
40 | |
41 | #include <ctype.h> // for isdigit |
42 | #include <cassert> |
43 | |
44 | using namespace KMime; |
45 | using namespace KMime::Types; |
46 | |
47 | namespace KMime { |
48 | |
49 | namespace Types { |
50 | |
51 | // QUrl::fromAce is extremely expensive, so only use it when necessary. |
52 | // Fortunately, the presence of IDNA is readily detected with a substring match... |
53 | static inline QString QUrl_fromAce_wrapper( const QString & domain ) |
54 | { |
55 | if ( domain.contains( QLatin1String( "xn--" ) ) ) { |
56 | return QUrl::fromAce( domain.toLatin1() ); |
57 | } else { |
58 | return domain; |
59 | } |
60 | } |
61 | |
62 | static QString addr_spec_as_string( const AddrSpec & as, bool pretty ) |
63 | { |
64 | if ( as.isEmpty() ) { |
65 | return QString(); |
66 | } |
67 | |
68 | static QChar dotChar = QLatin1Char( '.' ); |
69 | static QChar backslashChar = QLatin1Char( '\\' ); |
70 | static QChar quoteChar = QLatin1Char( '"' ); |
71 | |
72 | bool needsQuotes = false; |
73 | QString result; |
74 | result.reserve( as.localPart.length() + as.domain.length() + 1 ); |
75 | for ( int i = 0 ; i < as.localPart.length() ; ++i ) { |
76 | const QChar ch = as.localPart.at( i ); |
77 | if ( ch == dotChar || isAText( ch.toLatin1() ) ) { |
78 | result += ch; |
79 | } else { |
80 | needsQuotes = true; |
81 | if ( ch == backslashChar || ch == quoteChar ) { |
82 | result += backslashChar; |
83 | } |
84 | result += ch; |
85 | } |
86 | } |
87 | const QString dom = pretty ? QUrl_fromAce_wrapper( as.domain ) : as.domain ; |
88 | if ( needsQuotes ) { |
89 | result = quoteChar + result + quoteChar; |
90 | } |
91 | if ( dom.isEmpty() ) { |
92 | return result; |
93 | } else { |
94 | result += QLatin1Char( '@' ); |
95 | result += dom; |
96 | return result; |
97 | } |
98 | } |
99 | |
100 | QString AddrSpec::asString() const |
101 | { |
102 | return addr_spec_as_string( *this, false ); |
103 | } |
104 | |
105 | QString AddrSpec::asPrettyString() const |
106 | { |
107 | return addr_spec_as_string( *this, true ); |
108 | } |
109 | |
110 | bool AddrSpec::isEmpty() const |
111 | { |
112 | return localPart.isEmpty() && domain.isEmpty(); |
113 | } |
114 | |
115 | QByteArray Mailbox::address() const |
116 | { |
117 | QByteArray result; |
118 | const QString asString = addr_spec_as_string( mAddrSpec, false ); |
119 | if ( !asString.isEmpty() ) { |
120 | result = asString.toLatin1(); |
121 | } |
122 | return result; |
123 | //return mAddrSpec.asString().toLatin1(); |
124 | } |
125 | |
126 | AddrSpec Mailbox::addrSpec() const |
127 | { |
128 | return mAddrSpec; |
129 | } |
130 | |
131 | QString Mailbox::name() const |
132 | { |
133 | return mDisplayName; |
134 | } |
135 | |
136 | void Mailbox::setAddress( const AddrSpec &addr ) |
137 | { |
138 | mAddrSpec = addr; |
139 | } |
140 | |
141 | void Mailbox::setAddress( const QByteArray &addr ) |
142 | { |
143 | const char *cursor = addr.constData(); |
144 | if ( !HeaderParsing::parseAngleAddr( cursor, |
145 | cursor + addr.length(), mAddrSpec ) ) { |
146 | if ( !HeaderParsing::parseAddrSpec( cursor, cursor + addr.length(), |
147 | mAddrSpec ) ) { |
148 | kWarning() << "Invalid address" ; |
149 | return; |
150 | } |
151 | } |
152 | } |
153 | |
154 | void Mailbox::setName( const QString &name ) |
155 | { |
156 | mDisplayName = removeBidiControlChars( name ); |
157 | } |
158 | |
159 | void Mailbox::setNameFrom7Bit( const QByteArray &name, |
160 | const QByteArray &defaultCharset ) |
161 | { |
162 | QByteArray cs; |
163 | setName( decodeRFC2047String( name, cs, defaultCharset, false ) ); |
164 | } |
165 | |
166 | bool Mailbox::hasAddress() const |
167 | { |
168 | return !mAddrSpec.isEmpty(); |
169 | } |
170 | |
171 | bool Mailbox::hasName() const |
172 | { |
173 | return !mDisplayName.isEmpty(); |
174 | } |
175 | |
176 | QString Mailbox::prettyAddress() const |
177 | { |
178 | return prettyAddress( QuoteNever ); |
179 | } |
180 | |
181 | QString Mailbox::prettyAddress( Quoting quoting ) const |
182 | { |
183 | if ( !hasName() ) { |
184 | return QLatin1String( address() ); |
185 | } |
186 | QString s = name(); |
187 | if ( quoting != QuoteNever ) { |
188 | addQuotes( s, quoting == QuoteAlways /*bool force*/ ); |
189 | } |
190 | |
191 | if ( hasAddress() ) { |
192 | s += QLatin1String( " <" ) + QLatin1String( address() ) + QLatin1Char( '>' ); |
193 | } |
194 | return s; |
195 | } |
196 | |
197 | void Mailbox::fromUnicodeString( const QString &s ) |
198 | { |
199 | from7BitString( encodeRFC2047Sentence( s, "utf-8" ) ); |
200 | } |
201 | |
202 | void Mailbox::from7BitString( const QByteArray &s ) |
203 | { |
204 | const char *cursor = s.constData(); |
205 | HeaderParsing::parseMailbox( cursor, cursor + s.length(), *this ); |
206 | } |
207 | |
208 | QByteArray KMime::Types::Mailbox::as7BitString( const QByteArray &encCharset ) const |
209 | { |
210 | if ( !hasName() ) { |
211 | return address(); |
212 | } |
213 | QByteArray rv; |
214 | if ( isUsAscii( name() ) ) { |
215 | QByteArray tmp = name().toLatin1(); |
216 | addQuotes( tmp, false ); |
217 | rv += tmp; |
218 | } else { |
219 | rv += encodeRFC2047String( name(), encCharset, true ); |
220 | } |
221 | if ( hasAddress() ) { |
222 | rv += " <" + address() + '>'; |
223 | } |
224 | return rv; |
225 | } |
226 | |
227 | } // namespace Types |
228 | |
229 | namespace HeaderParsing { |
230 | |
231 | // parse the encoded-word (scursor points to after the initial '=') |
232 | bool ( const char* &scursor, const char * const send, |
233 | QString &result, QByteArray &language, |
234 | QByteArray &usedCS, const QByteArray &defaultCS, |
235 | bool forceCS ) |
236 | { |
237 | // make sure the caller already did a bit of the work. |
238 | assert( *( scursor - 1 ) == '=' ); |
239 | |
240 | // |
241 | // STEP 1: |
242 | // scan for the charset/language portion of the encoded-word |
243 | // |
244 | |
245 | char ch = *scursor++; |
246 | |
247 | if ( ch != '?' ) { |
248 | // kDebug() << "first"; |
249 | //KMIME_WARN_PREMATURE_END_OF( EncodedWord ); |
250 | return false; |
251 | } |
252 | |
253 | // remember start of charset (ie. just after the initial "=?") and |
254 | // language (just after the first '*') fields: |
255 | const char * charsetStart = scursor; |
256 | const char * languageStart = 0; |
257 | |
258 | // find delimiting '?' (and the '*' separating charset and language |
259 | // tags, if any): |
260 | for ( ; scursor != send ; scursor++ ) { |
261 | if ( *scursor == '?' ) { |
262 | break; |
263 | } else if ( *scursor == '*' && languageStart == 0 ) { |
264 | languageStart = scursor + 1; |
265 | } |
266 | } |
267 | |
268 | // not found? can't be an encoded-word! |
269 | if ( scursor == send || *scursor != '?' ) { |
270 | // kDebug() << "second"; |
271 | KMIME_WARN_PREMATURE_END_OF( EncodedWord ); |
272 | return false; |
273 | } |
274 | |
275 | // extract the language information, if any (if languageStart is 0, |
276 | // language will be null, too): |
277 | QByteArray maybeLanguage( languageStart, scursor - languageStart ); |
278 | // extract charset information (keep in mind: the size given to the |
279 | // ctor is one off due to the \0 terminator): |
280 | QByteArray maybeCharset( charsetStart, |
281 | ( languageStart ? languageStart - 1 : scursor ) - charsetStart ); |
282 | |
283 | // |
284 | // STEP 2: |
285 | // scan for the encoding portion of the encoded-word |
286 | // |
287 | |
288 | // remember start of encoding (just _after_ the second '?'): |
289 | scursor++; |
290 | const char * encodingStart = scursor; |
291 | |
292 | // find next '?' (ending the encoding tag): |
293 | for ( ; scursor != send ; scursor++ ) { |
294 | if ( *scursor == '?' ) { |
295 | break; |
296 | } |
297 | } |
298 | |
299 | // not found? Can't be an encoded-word! |
300 | if ( scursor == send || *scursor != '?' ) { |
301 | // kDebug() << "third"; |
302 | KMIME_WARN_PREMATURE_END_OF( EncodedWord ); |
303 | return false; |
304 | } |
305 | |
306 | // extract the encoding information: |
307 | QByteArray maybeEncoding( encodingStart, scursor - encodingStart ); |
308 | |
309 | // kDebug() << "parseEncodedWord: found charset == \"" << maybeCharset |
310 | // << "\"; language == \"" << maybeLanguage |
311 | // << "\"; encoding == \"" << maybeEncoding << "\""; |
312 | |
313 | // |
314 | // STEP 3: |
315 | // scan for encoded-text portion of encoded-word |
316 | // |
317 | |
318 | // remember start of encoded-text (just after the third '?'): |
319 | scursor++; |
320 | const char * encodedTextStart = scursor; |
321 | |
322 | // find the '?=' sequence (ending the encoded-text): |
323 | for ( ; scursor != send ; scursor++ ) { |
324 | if ( *scursor == '?' ) { |
325 | if ( scursor + 1 != send ) { |
326 | if ( *( scursor + 1 ) != '=' ) { // We expect a '=' after the '?', but we got something else; ignore |
327 | KMIME_WARN << "Stray '?' in q-encoded word, ignoring this." ; |
328 | continue; |
329 | } |
330 | else { // yep, found a '?=' sequence |
331 | scursor += 2; |
332 | break; |
333 | } |
334 | } |
335 | else { // The '?' is the last char, but we need a '=' after it! |
336 | KMIME_WARN_PREMATURE_END_OF( EncodedWord ); |
337 | return false; |
338 | } |
339 | } |
340 | } |
341 | |
342 | if ( *( scursor - 2 ) != '?' || *( scursor - 1 ) != '=' || |
343 | scursor < encodedTextStart + 2 ) { |
344 | KMIME_WARN_PREMATURE_END_OF( EncodedWord ); |
345 | return false; |
346 | } |
347 | |
348 | // set end sentinel for encoded-text: |
349 | const char * const encodedTextEnd = scursor - 2; |
350 | |
351 | // |
352 | // STEP 4: |
353 | // setup decoders for the transfer encoding and the charset |
354 | // |
355 | |
356 | // try if there's a codec for the encoding found: |
357 | Codec * codec = Codec::codecForName( maybeEncoding ); |
358 | if ( !codec ) { |
359 | KMIME_WARN_UNKNOWN( Encoding, maybeEncoding ); |
360 | return false; |
361 | } |
362 | |
363 | // get an instance of a corresponding decoder: |
364 | Decoder * dec = codec->makeDecoder(); |
365 | assert( dec ); |
366 | |
367 | // try if there's a (text)codec for the charset found: |
368 | bool matchOK = false; |
369 | QTextCodec *textCodec = 0; |
370 | if ( forceCS || maybeCharset.isEmpty() ) { |
371 | textCodec = KGlobal::charsets()->codecForName( QLatin1String( defaultCS ), matchOK ); |
372 | usedCS = cachedCharset( defaultCS ); |
373 | } else { |
374 | textCodec = KGlobal::charsets()->codecForName( QLatin1String( maybeCharset ), matchOK ); |
375 | if ( !matchOK ) { //no suitable codec found => use default charset |
376 | textCodec = KGlobal::charsets()->codecForName( QLatin1String( defaultCS ), matchOK ); |
377 | usedCS = cachedCharset( defaultCS ); |
378 | } else { |
379 | usedCS = cachedCharset( maybeCharset ); |
380 | } |
381 | } |
382 | |
383 | if ( !matchOK || !textCodec ) { |
384 | KMIME_WARN_UNKNOWN( Charset, maybeCharset ); |
385 | delete dec; |
386 | return false; |
387 | }; |
388 | |
389 | // kDebug() << "mimeName(): \"" << textCodec->name() << "\""; |
390 | |
391 | // allocate a temporary buffer to store the 8bit text: |
392 | int encodedTextLength = encodedTextEnd - encodedTextStart; |
393 | QByteArray buffer; |
394 | buffer.resize( codec->maxDecodedSizeFor( encodedTextLength ) ); |
395 | char *bbegin = buffer.data(); |
396 | char *bend = bbegin + buffer.length(); |
397 | |
398 | // |
399 | // STEP 5: |
400 | // do the actual decoding |
401 | // |
402 | |
403 | if ( !dec->decode( encodedTextStart, encodedTextEnd, bbegin, bend ) ) { |
404 | KMIME_WARN << codec->name() << "codec lies about its maxDecodedSizeFor(" |
405 | << encodedTextLength << ")\nresult may be truncated" ; |
406 | } |
407 | |
408 | result = textCodec->toUnicode( buffer.data(), bbegin - buffer.data() ); |
409 | |
410 | // kDebug() << "result now: \"" << result << "\""; |
411 | // cleanup: |
412 | delete dec; |
413 | language = maybeLanguage; |
414 | |
415 | return true; |
416 | } |
417 | |
418 | static inline void ( const char* &scursor, const char * const send ) |
419 | { |
420 | while ( scursor != send && |
421 | ( *scursor == ' ' || *scursor == '\n' || |
422 | *scursor == '\t' || *scursor == '\r' ) ) |
423 | scursor++; |
424 | } |
425 | |
426 | bool ( const char * &scursor, const char * const send, |
427 | QString &result, bool allow8Bit ) |
428 | { |
429 | QPair<const char*, int> maybeResult; |
430 | |
431 | if ( parseAtom( scursor, send, maybeResult, allow8Bit ) ) { |
432 | result += QString::fromLatin1( maybeResult.first, maybeResult.second ); |
433 | return true; |
434 | } |
435 | |
436 | return false; |
437 | } |
438 | |
439 | bool ( const char * &scursor, const char * const send, |
440 | QPair<const char*,int> &result, bool allow8Bit ) |
441 | { |
442 | bool success = false; |
443 | const char *start = scursor; |
444 | |
445 | while ( scursor != send ) { |
446 | signed char ch = *scursor++; |
447 | if ( ch > 0 && isAText( ch ) ) { |
448 | // AText: OK |
449 | success = true; |
450 | } else if ( allow8Bit && ch < 0 ) { |
451 | // 8bit char: not OK, but be tolerant. |
452 | KMIME_WARN_8BIT( ch ); |
453 | success = true; |
454 | } else { |
455 | // CTL or special - marking the end of the atom: |
456 | // re-set sursor to point to the offending |
457 | // char and return: |
458 | scursor--; |
459 | break; |
460 | } |
461 | } |
462 | result.first = start; |
463 | result.second = scursor - start; |
464 | return success; |
465 | } |
466 | |
467 | // FIXME: Remove this and the other parseToken() method. add a new one where "result" is a |
468 | // QByteArray. |
469 | bool ( const char * &scursor, const char * const send, |
470 | QString &result, bool allow8Bit ) |
471 | { |
472 | QPair<const char*, int> maybeResult; |
473 | |
474 | if ( parseToken( scursor, send, maybeResult, allow8Bit ) ) { |
475 | result += QString::fromLatin1( maybeResult.first, maybeResult.second ); |
476 | return true; |
477 | } |
478 | |
479 | return false; |
480 | } |
481 | |
482 | bool ( const char * &scursor, const char * const send, |
483 | QPair<const char*,int> &result, bool allow8Bit ) |
484 | { |
485 | bool success = false; |
486 | const char * start = scursor; |
487 | |
488 | while ( scursor != send ) { |
489 | signed char ch = *scursor++; |
490 | if ( ch > 0 && isTText( ch ) ) { |
491 | // TText: OK |
492 | success = true; |
493 | } else if ( allow8Bit && ch < 0 ) { |
494 | // 8bit char: not OK, but be tolerant. |
495 | KMIME_WARN_8BIT( ch ); |
496 | success = true; |
497 | } else { |
498 | // CTL or tspecial - marking the end of the atom: |
499 | // re-set sursor to point to the offending |
500 | // char and return: |
501 | scursor--; |
502 | break; |
503 | } |
504 | } |
505 | result.first = start; |
506 | result.second = scursor - start; |
507 | return success; |
508 | } |
509 | |
510 | #define READ_ch_OR_FAIL if ( scursor == send ) { \ |
511 | KMIME_WARN_PREMATURE_END_OF( GenericQuotedString ); \ |
512 | return false; \ |
513 | } else { \ |
514 | ch = *scursor++; \ |
515 | } |
516 | |
517 | // known issues: |
518 | // |
519 | // - doesn't handle quoted CRLF |
520 | |
521 | // FIXME: Why is result a QString? This should be a QByteArray, since at this level, we don't |
522 | // know about encodings yet! |
523 | bool ( const char* &scursor, const char * const send, |
524 | QString &result, bool isCRLF, |
525 | const char openChar, const char closeChar ) |
526 | { |
527 | char ch; |
528 | // We are in a quoted-string or domain-literal or comment and the |
529 | // cursor points to the first char after the openChar. |
530 | // We will apply unfolding and quoted-pair removal. |
531 | // We return when we either encounter the end or unescaped openChar |
532 | // or closeChar. |
533 | |
534 | assert( *( scursor - 1 ) == openChar || *( scursor - 1 ) == closeChar ); |
535 | |
536 | while ( scursor != send ) { |
537 | ch = *scursor++; |
538 | |
539 | if ( ch == closeChar || ch == openChar ) { |
540 | // end of quoted-string or another opening char: |
541 | // let caller decide what to do. |
542 | return true; |
543 | } |
544 | |
545 | switch ( ch ) { |
546 | case '\\': // quoted-pair |
547 | // misses "\" CRLF LWSP-char handling, see rfc822, 3.4.5 |
548 | READ_ch_OR_FAIL; |
549 | KMIME_WARN_IF_8BIT( ch ); |
550 | result += QLatin1Char( ch ); |
551 | break; |
552 | case '\r': |
553 | // ### |
554 | // The case of lonely '\r' is easy to solve, as they're |
555 | // not part of Unix Line-ending conventions. |
556 | // But I see a problem if we are given Unix-native |
557 | // line-ending-mails, where we cannot determine anymore |
558 | // whether a given '\n' was part of a CRLF or was occurring |
559 | // on it's own. |
560 | READ_ch_OR_FAIL; |
561 | if ( ch != '\n' ) { |
562 | // CR on it's own... |
563 | KMIME_WARN_LONE( CR ); |
564 | result += QLatin1Char( '\r' ); |
565 | scursor--; // points to after the '\r' again |
566 | } else { |
567 | // CRLF encountered. |
568 | // lookahead: check for folding |
569 | READ_ch_OR_FAIL; |
570 | if ( ch == ' ' || ch == '\t' ) { |
571 | // correct folding; |
572 | // position cursor behind the CRLF WSP (unfolding) |
573 | // and add the WSP to the result |
574 | result += QLatin1Char( ch ); |
575 | } else { |
576 | // this is the "shouldn't happen"-case. There is a CRLF |
577 | // inside a quoted-string without it being part of FWS. |
578 | // We take it verbatim. |
579 | KMIME_WARN_NON_FOLDING( CRLF ); |
580 | result += QLatin1String( "\r\n" ); |
581 | // the cursor is decremented again, so's we need not |
582 | // duplicate the whole switch here. "ch" could've been |
583 | // everything (incl. openChar or closeChar). |
584 | scursor--; |
585 | } |
586 | } |
587 | break; |
588 | case '\n': |
589 | // Note: CRLF has been handled above already! |
590 | // ### LF needs special treatment, depending on whether isCRLF |
591 | // is true (we can be sure a lonely '\n' was meant this way) or |
592 | // false ('\n' alone could have meant LF or CRLF in the original |
593 | // message. This parser assumes CRLF iff the LF is followed by |
594 | // either WSP (folding) or NULL (premature end of quoted-string; |
595 | // Should be fixed, since NULL is allowed as per rfc822). |
596 | READ_ch_OR_FAIL; |
597 | if ( !isCRLF && ( ch == ' ' || ch == '\t' ) ) { |
598 | // folding |
599 | // correct folding |
600 | result += QLatin1Char( ch ); |
601 | } else { |
602 | // non-folding |
603 | KMIME_WARN_LONE( LF ); |
604 | result += QLatin1Char( '\n' ); |
605 | // pos is decremented, so's we need not duplicate the whole |
606 | // switch here. ch could've been everything (incl. <">, "\"). |
607 | scursor--; |
608 | } |
609 | break; |
610 | case '=': |
611 | { |
612 | // ### Work around broken clients that send encoded words in quoted-strings |
613 | // For example, older KMail versions. |
614 | if ( scursor == send ) { |
615 | break; |
616 | } |
617 | |
618 | const char *oldscursor = scursor; |
619 | QString tmp; |
620 | QByteArray lang, charset; |
621 | if ( *scursor++ == '?' ) { |
622 | --scursor; |
623 | if ( parseEncodedWord( scursor, send, tmp, lang, charset ) ) { |
624 | result += tmp; |
625 | break; |
626 | } else { |
627 | scursor = oldscursor; |
628 | } |
629 | } else { |
630 | scursor = oldscursor; |
631 | } |
632 | // fall through |
633 | } |
634 | default: |
635 | KMIME_WARN_IF_8BIT( ch ); |
636 | result += QLatin1Char( ch ); |
637 | } |
638 | } |
639 | |
640 | return false; |
641 | } |
642 | |
643 | // known issues: |
644 | // |
645 | // - doesn't handle encoded-word inside comments. |
646 | |
647 | bool ( const char* &scursor, const char * const send, |
648 | QString &result, bool isCRLF, bool reallySave ) |
649 | { |
650 | int = 1; |
651 | const char *afterLastClosingParenPos = 0; |
652 | QString maybeCmnt; |
653 | const char *oldscursor = scursor; |
654 | |
655 | assert( *( scursor - 1 ) == '(' ); |
656 | |
657 | while ( commentNestingDepth ) { |
658 | QString cmntPart; |
659 | if ( parseGenericQuotedString( scursor, send, cmntPart, isCRLF, '(', ')' ) ) { |
660 | assert( *( scursor - 1 ) == ')' || *( scursor - 1 ) == '(' ); |
661 | // see the kdoc for above function for the possible conditions |
662 | // we have to check: |
663 | switch ( *( scursor - 1 ) ) { |
664 | case ')': |
665 | if ( reallySave ) { |
666 | // add the chunk that's now surely inside the comment. |
667 | result += maybeCmnt; |
668 | result += cmntPart; |
669 | if ( commentNestingDepth > 1 ) { |
670 | // don't add the outermost ')'... |
671 | result += QLatin1Char( ')' ); |
672 | } |
673 | maybeCmnt.clear(); |
674 | } |
675 | afterLastClosingParenPos = scursor; |
676 | --commentNestingDepth; |
677 | break; |
678 | case '(': |
679 | if ( reallySave ) { |
680 | // don't add to "result" yet, because we might find that we |
681 | // are already outside the (broken) comment... |
682 | maybeCmnt += cmntPart; |
683 | maybeCmnt += QLatin1Char( '(' ); |
684 | } |
685 | ++commentNestingDepth; |
686 | break; |
687 | default: assert( 0 ); |
688 | } // switch |
689 | } else { |
690 | // !parseGenericQuotedString, ie. premature end |
691 | if ( afterLastClosingParenPos ) { |
692 | scursor = afterLastClosingParenPos; |
693 | } else { |
694 | scursor = oldscursor; |
695 | } |
696 | return false; |
697 | } |
698 | } // while |
699 | |
700 | return true; |
701 | } |
702 | |
703 | // known issues: none. |
704 | |
705 | bool ( const char* &scursor, const char * const send, |
706 | QString &result, bool isCRLF ) |
707 | { |
708 | enum { |
709 | None, Phrase, Atom, EncodedWord, QuotedString |
710 | } found = None; |
711 | |
712 | QString tmp; |
713 | QByteArray lang, charset; |
714 | const char *successfullyParsed = 0; |
715 | // only used by the encoded-word branch |
716 | const char *oldscursor; |
717 | // used to suppress whitespace between adjacent encoded-words |
718 | // (rfc2047, 6.2): |
719 | bool lastWasEncodedWord = false; |
720 | |
721 | while ( scursor != send ) { |
722 | char ch = *scursor++; |
723 | switch ( ch ) { |
724 | case '.': // broken, but allow for intorop's sake |
725 | if ( found == None ) { |
726 | --scursor; |
727 | return false; |
728 | } else { |
729 | if ( scursor != send && ( *scursor == ' ' || *scursor == '\t' ) ) { |
730 | result += QLatin1String( ". " ); |
731 | } else { |
732 | result += QLatin1Char( '.' ); |
733 | } |
734 | successfullyParsed = scursor; |
735 | } |
736 | break; |
737 | case '"': // quoted-string |
738 | tmp.clear(); |
739 | if ( parseGenericQuotedString( scursor, send, tmp, isCRLF, '"', '"' ) ) { |
740 | successfullyParsed = scursor; |
741 | assert( *( scursor - 1 ) == '"' ); |
742 | switch ( found ) { |
743 | case None: |
744 | found = QuotedString; |
745 | break; |
746 | case Phrase: |
747 | case Atom: |
748 | case EncodedWord: |
749 | case QuotedString: |
750 | found = Phrase; |
751 | result += QLatin1Char( ' ' ); // rfc822, 3.4.4 |
752 | break; |
753 | default: |
754 | assert( 0 ); |
755 | } |
756 | lastWasEncodedWord = false; |
757 | result += tmp; |
758 | } else { |
759 | // premature end of quoted string. |
760 | // What to do? Return leading '"' as special? Return as quoted-string? |
761 | // We do the latter if we already found something, else signal failure. |
762 | if ( found == None ) { |
763 | return false; |
764 | } else { |
765 | result += QLatin1Char( ' ' ); // rfc822, 3.4.4 |
766 | result += tmp; |
767 | return true; |
768 | } |
769 | } |
770 | break; |
771 | case '(': // comment |
772 | // parse it, but ignore content: |
773 | tmp.clear(); |
774 | if ( parseComment( scursor, send, tmp, isCRLF, |
775 | false /*don't bother with the content*/ ) ) { |
776 | successfullyParsed = scursor; |
777 | lastWasEncodedWord = false; // strictly interpreting rfc2047, 6.2 |
778 | } else { |
779 | if ( found == None ) { |
780 | return false; |
781 | } else { |
782 | scursor = successfullyParsed; |
783 | return true; |
784 | } |
785 | } |
786 | break; |
787 | case '=': // encoded-word |
788 | tmp.clear(); |
789 | oldscursor = scursor; |
790 | lang.clear(); |
791 | charset.clear(); |
792 | if ( parseEncodedWord( scursor, send, tmp, lang, charset ) ) { |
793 | successfullyParsed = scursor; |
794 | switch ( found ) { |
795 | case None: |
796 | found = EncodedWord; |
797 | break; |
798 | case Phrase: |
799 | case EncodedWord: |
800 | case Atom: |
801 | case QuotedString: |
802 | if ( !lastWasEncodedWord ) { |
803 | result += QLatin1Char( ' ' ); // rfc822, 3.4.4 |
804 | } |
805 | found = Phrase; |
806 | break; |
807 | default: assert( 0 ); |
808 | } |
809 | lastWasEncodedWord = true; |
810 | result += tmp; |
811 | break; |
812 | } else { |
813 | // parse as atom: |
814 | scursor = oldscursor; |
815 | } |
816 | // fall though... |
817 | |
818 | default: //atom |
819 | tmp.clear(); |
820 | scursor--; |
821 | if ( parseAtom( scursor, send, tmp, true /* allow 8bit */ ) ) { |
822 | successfullyParsed = scursor; |
823 | switch ( found ) { |
824 | case None: |
825 | found = Atom; |
826 | break; |
827 | case Phrase: |
828 | case Atom: |
829 | case EncodedWord: |
830 | case QuotedString: |
831 | found = Phrase; |
832 | result += QLatin1Char( ' ' ); // rfc822, 3.4.4 |
833 | break; |
834 | default: |
835 | assert( 0 ); |
836 | } |
837 | lastWasEncodedWord = false; |
838 | result += tmp; |
839 | } else { |
840 | if ( found == None ) { |
841 | return false; |
842 | } else { |
843 | scursor = successfullyParsed; |
844 | return true; |
845 | } |
846 | } |
847 | } |
848 | eatWhiteSpace( scursor, send ); |
849 | } |
850 | |
851 | return found != None; |
852 | } |
853 | |
854 | // FIXME: This should probably by QByteArray &result instead? |
855 | bool ( const char* &scursor, const char * const send, |
856 | QString &result, bool isCRLF ) |
857 | { |
858 | eatCFWS( scursor, send, isCRLF ); |
859 | |
860 | // always points to just after the last atom parsed: |
861 | const char *successfullyParsed; |
862 | |
863 | QString tmp; |
864 | if ( !parseAtom( scursor, send, tmp, false /* no 8bit */ ) ) { |
865 | return false; |
866 | } |
867 | result += tmp; |
868 | successfullyParsed = scursor; |
869 | |
870 | while ( scursor != send ) { |
871 | |
872 | // end of header or no '.' -> return |
873 | if ( scursor == send || *scursor != '.' ) { |
874 | return true; |
875 | } |
876 | scursor++; // eat '.' |
877 | |
878 | if ( scursor == send || !isAText( *scursor ) ) { |
879 | // end of header or no AText, but this time following a '.'!: |
880 | // reset cursor to just after last successfully parsed char and |
881 | // return: |
882 | scursor = successfullyParsed; |
883 | return true; |
884 | } |
885 | |
886 | // try to parse the next atom: |
887 | QString maybeAtom; |
888 | if ( !parseAtom( scursor, send, maybeAtom, false /*no 8bit*/ ) ) { |
889 | scursor = successfullyParsed; |
890 | return true; |
891 | } |
892 | |
893 | result += QLatin1Char( '.' ); |
894 | result += maybeAtom; |
895 | successfullyParsed = scursor; |
896 | } |
897 | |
898 | scursor = successfullyParsed; |
899 | return true; |
900 | } |
901 | |
902 | void ( const char* &scursor, const char * const send, bool isCRLF ) |
903 | { |
904 | QString dummy; |
905 | |
906 | while ( scursor != send ) { |
907 | const char *oldscursor = scursor; |
908 | |
909 | char ch = *scursor++; |
910 | |
911 | switch ( ch ) { |
912 | case ' ': |
913 | case '\t': // whitespace |
914 | case '\r': |
915 | case '\n': // folding |
916 | continue; |
917 | |
918 | case '(': // comment |
919 | if ( parseComment( scursor, send, dummy, isCRLF, false /*don't save*/ ) ) { |
920 | continue; |
921 | } |
922 | scursor = oldscursor; |
923 | return; |
924 | |
925 | default: |
926 | scursor = oldscursor; |
927 | return; |
928 | } |
929 | } |
930 | } |
931 | |
932 | bool parseDomain( const char* &scursor, const char * const send, |
933 | QString &result, bool isCRLF ) |
934 | { |
935 | eatCFWS( scursor, send, isCRLF ); |
936 | if ( scursor == send ) { |
937 | return false; |
938 | } |
939 | |
940 | // domain := dot-atom / domain-literal / atom *("." atom) |
941 | // |
942 | // equivalent to: |
943 | // domain = dot-atom / domain-literal, |
944 | // since parseDotAtom does allow CFWS between atoms and dots |
945 | |
946 | if ( *scursor == '[' ) { |
947 | // domain-literal: |
948 | QString maybeDomainLiteral; |
949 | // eat '[': |
950 | scursor++; |
951 | while ( parseGenericQuotedString( scursor, send, maybeDomainLiteral, |
952 | isCRLF, '[', ']' ) ) { |
953 | if ( scursor == send ) { |
954 | // end of header: check for closing ']': |
955 | if ( *( scursor - 1 ) == ']' ) { |
956 | // OK, last char was ']': |
957 | result = maybeDomainLiteral; |
958 | return true; |
959 | } else { |
960 | // not OK, domain-literal wasn't closed: |
961 | return false; |
962 | } |
963 | } |
964 | // we hit openChar in parseGenericQuotedString. |
965 | // include it in maybeDomainLiteral and keep on parsing: |
966 | if ( *( scursor - 1 ) == '[' ) { |
967 | maybeDomainLiteral += QLatin1Char( '[' ); |
968 | continue; |
969 | } |
970 | // OK, real end of domain-literal: |
971 | result = maybeDomainLiteral; |
972 | return true; |
973 | } |
974 | } else { |
975 | // dot-atom: |
976 | QString maybeDotAtom; |
977 | if ( parseDotAtom( scursor, send, maybeDotAtom, isCRLF ) ) { |
978 | result = maybeDotAtom; |
979 | // Domain may end with '.', if so preserve it' |
980 | if ( scursor != send && *scursor == '.' ) { |
981 | result += QLatin1Char( '.' ); |
982 | scursor++; |
983 | } |
984 | return true; |
985 | } |
986 | } |
987 | return false; |
988 | } |
989 | |
990 | bool ( const char* &scursor, const char* const send, |
991 | QStringList &result, bool isCRLF, bool save ) |
992 | { |
993 | while ( scursor != send ) { |
994 | eatCFWS( scursor, send, isCRLF ); |
995 | if ( scursor == send ) { |
996 | return false; |
997 | } |
998 | |
999 | // empty entry: |
1000 | if ( *scursor == ',' ) { |
1001 | scursor++; |
1002 | if ( save ) { |
1003 | result.append( QString() ); |
1004 | } |
1005 | continue; |
1006 | } |
1007 | |
1008 | // empty entry ending the list: |
1009 | if ( *scursor == ':' ) { |
1010 | scursor++; |
1011 | if ( save ) { |
1012 | result.append( QString() ); |
1013 | } |
1014 | return true; |
1015 | } |
1016 | |
1017 | // each non-empty entry must begin with '@': |
1018 | if ( *scursor != '@' ) { |
1019 | return false; |
1020 | } else { |
1021 | scursor++; |
1022 | } |
1023 | |
1024 | QString maybeDomain; |
1025 | if ( !parseDomain( scursor, send, maybeDomain, isCRLF ) ) { |
1026 | return false; |
1027 | } |
1028 | if ( save ) { |
1029 | result.append( maybeDomain ); |
1030 | } |
1031 | |
1032 | // eat the following (optional) comma: |
1033 | eatCFWS( scursor, send, isCRLF ); |
1034 | if ( scursor == send ) { |
1035 | return false; |
1036 | } |
1037 | if ( *scursor == ':' ) { |
1038 | scursor++; |
1039 | return true; |
1040 | } |
1041 | if ( *scursor == ',' ) { |
1042 | scursor++; |
1043 | } |
1044 | } |
1045 | |
1046 | return false; |
1047 | } |
1048 | |
1049 | bool ( const char* &scursor, const char * const send, |
1050 | AddrSpec &result, bool isCRLF ) |
1051 | { |
1052 | // |
1053 | // STEP 1: |
1054 | // local-part := dot-atom / quoted-string / word *("." word) |
1055 | // |
1056 | // this is equivalent to: |
1057 | // local-part := word *("." word) |
1058 | |
1059 | QString maybeLocalPart; |
1060 | QString tmp; |
1061 | |
1062 | while ( scursor != send ) { |
1063 | // first, eat any whitespace |
1064 | eatCFWS( scursor, send, isCRLF ); |
1065 | |
1066 | char ch = *scursor++; |
1067 | switch ( ch ) { |
1068 | case '.': // dot |
1069 | maybeLocalPart += QLatin1Char( '.' ); |
1070 | break; |
1071 | |
1072 | case '@': |
1073 | goto SAW_AT_SIGN; |
1074 | break; |
1075 | |
1076 | case '"': // quoted-string |
1077 | tmp.clear(); |
1078 | if ( parseGenericQuotedString( scursor, send, tmp, isCRLF, '"', '"' ) ) { |
1079 | maybeLocalPart += tmp; |
1080 | } else { |
1081 | return false; |
1082 | } |
1083 | break; |
1084 | |
1085 | default: // atom |
1086 | scursor--; // re-set scursor to point to ch again |
1087 | tmp.clear(); |
1088 | if ( parseAtom( scursor, send, tmp, false /* no 8bit */ ) ) { |
1089 | maybeLocalPart += tmp; |
1090 | } else { |
1091 | return false; // parseAtom can only fail if the first char is non-atext. |
1092 | } |
1093 | break; |
1094 | } |
1095 | } |
1096 | |
1097 | return false; |
1098 | |
1099 | // |
1100 | // STEP 2: |
1101 | // domain |
1102 | // |
1103 | |
1104 | SAW_AT_SIGN: |
1105 | |
1106 | assert( *( scursor - 1 ) == '@' ); |
1107 | |
1108 | QString maybeDomain; |
1109 | if ( !parseDomain( scursor, send, maybeDomain, isCRLF ) ) { |
1110 | return false; |
1111 | } |
1112 | |
1113 | result.localPart = maybeLocalPart; |
1114 | result.domain = maybeDomain; |
1115 | |
1116 | return true; |
1117 | } |
1118 | |
1119 | bool ( const char* &scursor, const char * const send, |
1120 | AddrSpec &result, bool isCRLF ) |
1121 | { |
1122 | // first, we need an opening angle bracket: |
1123 | eatCFWS( scursor, send, isCRLF ); |
1124 | if ( scursor == send || *scursor != '<' ) { |
1125 | return false; |
1126 | } |
1127 | scursor++; // eat '<' |
1128 | |
1129 | eatCFWS( scursor, send, isCRLF ); |
1130 | if ( scursor == send ) { |
1131 | return false; |
1132 | } |
1133 | |
1134 | if ( *scursor == '@' || *scursor == ',' ) { |
1135 | // obs-route: parse, but ignore: |
1136 | KMIME_WARN << "obsolete source route found! ignoring." ; |
1137 | QStringList dummy; |
1138 | if ( !parseObsRoute( scursor, send, dummy, |
1139 | isCRLF, false /* don't save */ ) ) { |
1140 | return false; |
1141 | } |
1142 | // angle-addr isn't complete until after the '>': |
1143 | if ( scursor == send ) { |
1144 | return false; |
1145 | } |
1146 | } |
1147 | |
1148 | // parse addr-spec: |
1149 | AddrSpec maybeAddrSpec; |
1150 | if ( !parseAddrSpec( scursor, send, maybeAddrSpec, isCRLF ) ) { |
1151 | return false; |
1152 | } |
1153 | |
1154 | eatCFWS( scursor, send, isCRLF ); |
1155 | if ( scursor == send || *scursor != '>' ) { |
1156 | return false; |
1157 | } |
1158 | scursor++; |
1159 | |
1160 | result = maybeAddrSpec; |
1161 | return true; |
1162 | |
1163 | } |
1164 | |
1165 | static QString ( const QString &input ) |
1166 | { |
1167 | const QLatin1Char quotes( '"' ); |
1168 | if ( input.startsWith( quotes ) && input.endsWith( quotes ) ) { |
1169 | QString stripped( input.mid( 1, input.size() - 2 ) ); |
1170 | return stripped; |
1171 | } else { |
1172 | return input; |
1173 | } |
1174 | } |
1175 | |
1176 | bool ( const char* &scursor, const char * const send, |
1177 | Mailbox &result, bool isCRLF ) |
1178 | { |
1179 | eatCFWS( scursor, send, isCRLF ); |
1180 | if ( scursor == send ) { |
1181 | return false; |
1182 | } |
1183 | |
1184 | AddrSpec maybeAddrSpec; |
1185 | QString maybeDisplayName; |
1186 | |
1187 | // first, try if it's a vanilla addr-spec: |
1188 | const char * oldscursor = scursor; |
1189 | if ( parseAddrSpec( scursor, send, maybeAddrSpec, isCRLF ) ) { |
1190 | result.setAddress( maybeAddrSpec ); |
1191 | // check for the obsolete form of display-name (as comment): |
1192 | eatWhiteSpace( scursor, send ); |
1193 | if ( scursor != send && *scursor == '(' ) { |
1194 | scursor++; |
1195 | if ( !parseComment( scursor, send, maybeDisplayName, isCRLF, true /*keep*/ ) ) { |
1196 | return false; |
1197 | } |
1198 | } |
1199 | result.setName( stripQuotes( maybeDisplayName ) ); |
1200 | return true; |
1201 | } |
1202 | scursor = oldscursor; |
1203 | |
1204 | // second, see if there's a display-name: |
1205 | if ( !parsePhrase( scursor, send, maybeDisplayName, isCRLF ) ) { |
1206 | // failed: reset cursor, note absent display-name |
1207 | maybeDisplayName.clear(); |
1208 | scursor = oldscursor; |
1209 | } else { |
1210 | // succeeded: eat CFWS |
1211 | eatCFWS( scursor, send, isCRLF ); |
1212 | if ( scursor == send ) { |
1213 | return false; |
1214 | } |
1215 | } |
1216 | |
1217 | // third, parse the angle-addr: |
1218 | if ( !parseAngleAddr( scursor, send, maybeAddrSpec, isCRLF ) ) { |
1219 | return false; |
1220 | } |
1221 | |
1222 | if ( maybeDisplayName.isNull() ) { |
1223 | // check for the obsolete form of display-name (as comment): |
1224 | eatWhiteSpace( scursor, send ); |
1225 | if ( scursor != send && *scursor == '(' ) { |
1226 | scursor++; |
1227 | if ( !parseComment( scursor, send, maybeDisplayName, isCRLF, true /*keep*/ ) ) { |
1228 | return false; |
1229 | } |
1230 | } |
1231 | } |
1232 | |
1233 | result.setName( stripQuotes( maybeDisplayName ) ); |
1234 | result.setAddress( maybeAddrSpec ); |
1235 | return true; |
1236 | } |
1237 | |
1238 | bool ( const char* &scursor, const char * const send, |
1239 | Address &result, bool isCRLF ) |
1240 | { |
1241 | // group := display-name ":" [ mailbox-list / CFWS ] ";" [CFWS] |
1242 | // |
1243 | // equivalent to: |
1244 | // group := display-name ":" [ obs-mbox-list ] ";" |
1245 | |
1246 | eatCFWS( scursor, send, isCRLF ); |
1247 | if ( scursor == send ) { |
1248 | return false; |
1249 | } |
1250 | |
1251 | // get display-name: |
1252 | QString maybeDisplayName; |
1253 | if ( !parsePhrase( scursor, send, maybeDisplayName, isCRLF ) ) { |
1254 | return false; |
1255 | } |
1256 | |
1257 | // get ":": |
1258 | eatCFWS( scursor, send, isCRLF ); |
1259 | if ( scursor == send || *scursor != ':' ) { |
1260 | return false; |
1261 | } |
1262 | |
1263 | // KDE5 TODO: Don't expose displayName as public, but rather add setter for it that |
1264 | // automatically calls removeBidiControlChars |
1265 | result.displayName = removeBidiControlChars( maybeDisplayName ); |
1266 | |
1267 | // get obs-mbox-list (may contain empty entries): |
1268 | scursor++; |
1269 | while ( scursor != send ) { |
1270 | eatCFWS( scursor, send, isCRLF ); |
1271 | if ( scursor == send ) { |
1272 | return false; |
1273 | } |
1274 | |
1275 | // empty entry: |
1276 | if ( *scursor == ',' ) { |
1277 | scursor++; |
1278 | continue; |
1279 | } |
1280 | |
1281 | // empty entry ending the list: |
1282 | if ( *scursor == ';' ) { |
1283 | scursor++; |
1284 | return true; |
1285 | } |
1286 | |
1287 | Mailbox maybeMailbox; |
1288 | if ( !parseMailbox( scursor, send, maybeMailbox, isCRLF ) ) { |
1289 | return false; |
1290 | } |
1291 | result.mailboxList.append( maybeMailbox ); |
1292 | |
1293 | eatCFWS( scursor, send, isCRLF ); |
1294 | // premature end: |
1295 | if ( scursor == send ) { |
1296 | return false; |
1297 | } |
1298 | // regular end of the list: |
1299 | if ( *scursor == ';' ) { |
1300 | scursor++; |
1301 | return true; |
1302 | } |
1303 | // eat regular list entry separator: |
1304 | if ( *scursor == ',' ) { |
1305 | scursor++; |
1306 | } |
1307 | } |
1308 | return false; |
1309 | } |
1310 | |
1311 | bool ( const char* &scursor, const char * const send, |
1312 | Address &result, bool isCRLF ) |
1313 | { |
1314 | // address := mailbox / group |
1315 | |
1316 | eatCFWS( scursor, send, isCRLF ); |
1317 | if ( scursor == send ) { |
1318 | return false; |
1319 | } |
1320 | |
1321 | // first try if it's a single mailbox: |
1322 | Mailbox maybeMailbox; |
1323 | const char * oldscursor = scursor; |
1324 | if ( parseMailbox( scursor, send, maybeMailbox, isCRLF ) ) { |
1325 | // yes, it is: |
1326 | result.displayName.clear(); |
1327 | result.mailboxList.append( maybeMailbox ); |
1328 | return true; |
1329 | } |
1330 | scursor = oldscursor; |
1331 | |
1332 | Address maybeAddress; |
1333 | |
1334 | // no, it's not a single mailbox. Try if it's a group: |
1335 | if ( !parseGroup( scursor, send, maybeAddress, isCRLF ) ) { |
1336 | return false; |
1337 | } |
1338 | |
1339 | result = maybeAddress; |
1340 | return true; |
1341 | } |
1342 | |
1343 | bool ( const char* &scursor, const char * const send, |
1344 | AddressList &result, bool isCRLF ) |
1345 | { |
1346 | while ( scursor != send ) { |
1347 | eatCFWS( scursor, send, isCRLF ); |
1348 | // end of header: this is OK. |
1349 | if ( scursor == send ) { |
1350 | return true; |
1351 | } |
1352 | // empty entry: ignore: |
1353 | if ( *scursor == ',' ) { |
1354 | scursor++; |
1355 | continue; |
1356 | } |
1357 | // broken clients might use ';' as list delimiter, accept that as well |
1358 | if ( *scursor == ';' ) { |
1359 | scursor++; |
1360 | continue; |
1361 | } |
1362 | |
1363 | // parse one entry |
1364 | Address maybeAddress; |
1365 | if ( !parseAddress( scursor, send, maybeAddress, isCRLF ) ) { |
1366 | return false; |
1367 | } |
1368 | result.append( maybeAddress ); |
1369 | |
1370 | eatCFWS( scursor, send, isCRLF ); |
1371 | // end of header: this is OK. |
1372 | if ( scursor == send ) { |
1373 | return true; |
1374 | } |
1375 | // comma separating entries: eat it. |
1376 | if ( *scursor == ',' ) { |
1377 | scursor++; |
1378 | } |
1379 | } |
1380 | return true; |
1381 | } |
1382 | |
1383 | static QString = QString::fromLatin1( "*0*" , 1 ); |
1384 | static QString = QString::fromLatin1( "*0*" , 2 ); |
1385 | //static QString asteriskZeroAsterisk = QString::fromLatin1( "*0*", 3 ); |
1386 | |
1387 | // FIXME: Get rid of the very ugly "QStringOrQPair" thing. At this level, we are supposed to work |
1388 | // on byte arrays, not strings! The result parameter should be a simple |
1389 | // QPair<QByteArray,QByteArray>, which is the attribute name and the value. |
1390 | bool ( const char* &scursor, const char * const send, |
1391 | QPair<QString,QStringOrQPair> &result, bool isCRLF ) |
1392 | { |
1393 | // parameter = regular-parameter / extended-parameter |
1394 | // regular-parameter = regular-parameter-name "=" value |
1395 | // extended-parameter = |
1396 | // value = token / quoted-string |
1397 | // |
1398 | // note that rfc2231 handling is out of the scope of this function. |
1399 | // Therefore we return the attribute as QString and the value as |
1400 | // (start,length) tupel if we see that the value is encoded |
1401 | // (trailing asterisk), for parseParameterList to decode... |
1402 | |
1403 | eatCFWS( scursor, send, isCRLF ); |
1404 | if ( scursor == send ) { |
1405 | return false; |
1406 | } |
1407 | |
1408 | // |
1409 | // parse the parameter name: |
1410 | // |
1411 | // FIXME: maybeAttribute should be a QByteArray |
1412 | QString maybeAttribute; |
1413 | if ( !parseToken( scursor, send, maybeAttribute, false /* no 8bit */ ) ) { |
1414 | return false; |
1415 | } |
1416 | |
1417 | eatCFWS( scursor, send, isCRLF ); |
1418 | // premature end: not OK (haven't seen '=' yet). |
1419 | if ( scursor == send || *scursor != '=' ) { |
1420 | return false; |
1421 | } |
1422 | scursor++; // eat '=' |
1423 | |
1424 | eatCFWS( scursor, send, isCRLF ); |
1425 | if ( scursor == send ) { |
1426 | // don't choke on attribute=, meaning the value was omitted: |
1427 | if ( maybeAttribute.endsWith( asterisk ) ) { |
1428 | KMIME_WARN << "attribute ends with \"*\", but value is empty!" |
1429 | "Chopping away \"*\"." ; |
1430 | maybeAttribute.truncate( maybeAttribute.length() - 1 ); |
1431 | } |
1432 | result = qMakePair( maybeAttribute.toLower(), QStringOrQPair() ); |
1433 | return true; |
1434 | } |
1435 | |
1436 | const char * oldscursor = scursor; |
1437 | |
1438 | // |
1439 | // parse the parameter value: |
1440 | // |
1441 | QStringOrQPair maybeValue; |
1442 | if ( *scursor == '"' ) { |
1443 | // value is a quoted-string: |
1444 | scursor++; |
1445 | if ( maybeAttribute.endsWith( asterisk ) ) { |
1446 | // attributes ending with "*" designate extended-parameters, |
1447 | // which cannot have quoted-strings as values. So we remove the |
1448 | // trailing "*" to not confuse upper layers. |
1449 | KMIME_WARN << "attribute ends with \"*\", but value is a quoted-string!" |
1450 | "Chopping away \"*\"." ; |
1451 | maybeAttribute.truncate( maybeAttribute.length() - 1 ); |
1452 | } |
1453 | |
1454 | if ( !parseGenericQuotedString( scursor, send, maybeValue.qstring, isCRLF ) ) { |
1455 | scursor = oldscursor; |
1456 | result = qMakePair( maybeAttribute.toLower(), QStringOrQPair() ); |
1457 | return false; // this case needs further processing by upper layers!! |
1458 | } |
1459 | } else { |
1460 | // value is a token: |
1461 | if ( !parseToken( scursor, send, maybeValue.qpair, false /* no 8bit */ ) ) { |
1462 | scursor = oldscursor; |
1463 | result = qMakePair( maybeAttribute.toLower(), QStringOrQPair() ); |
1464 | return false; // this case needs further processing by upper layers!! |
1465 | } |
1466 | } |
1467 | |
1468 | result = qMakePair( maybeAttribute.toLower(), maybeValue ); |
1469 | return true; |
1470 | } |
1471 | |
1472 | // FIXME: Get rid of QStringOrQPair: Use a simply QMap<QByteArray, QByteArray> for "result" |
1473 | // instead! |
1474 | bool ( const char* &scursor, const char * const send, |
1475 | QMap<QString,QStringOrQPair> &result, |
1476 | bool isCRLF ) |
1477 | { |
1478 | // we use parseParameter() consecutively to obtain a map of raw |
1479 | // attributes to raw values. "Raw" here means that we don't do |
1480 | // rfc2231 decoding and concatenation. This is left to |
1481 | // parseParameterList(), which will call this function. |
1482 | // |
1483 | // The main reason for making this chunk of code a separate |
1484 | // (private) method is that we can deal with broken parameters |
1485 | // _here_ and leave the rfc2231 handling solely to |
1486 | // parseParameterList(), which will still be enough work. |
1487 | |
1488 | while ( scursor != send ) { |
1489 | eatCFWS( scursor, send, isCRLF ); |
1490 | // empty entry ending the list: OK. |
1491 | if ( scursor == send ) { |
1492 | return true; |
1493 | } |
1494 | // empty list entry: ignore. |
1495 | if ( *scursor == ';' ) { |
1496 | scursor++; |
1497 | continue; |
1498 | } |
1499 | |
1500 | QPair<QString, QStringOrQPair> maybeParameter; |
1501 | if ( !parseParameter( scursor, send, maybeParameter, isCRLF ) ) { |
1502 | // we need to do a bit of work if the attribute is not |
1503 | // NULL. These are the cases marked with "needs further |
1504 | // processing" in parseParameter(). Specifically, parsing of the |
1505 | // token or the quoted-string, which should represent the value, |
1506 | // failed. We take the easy way out and simply search for the |
1507 | // next ';' to start parsing again. (Another option would be to |
1508 | // take the text between '=' and ';' as value) |
1509 | if ( maybeParameter.first.isNull() ) { |
1510 | return false; |
1511 | } |
1512 | while ( scursor != send ) { |
1513 | if ( *scursor++ == ';' ) { |
1514 | goto IS_SEMICOLON; |
1515 | } |
1516 | } |
1517 | // scursor == send case: end of list. |
1518 | return true; |
1519 | IS_SEMICOLON: |
1520 | // *scursor == ';' case: parse next entry. |
1521 | continue; |
1522 | } |
1523 | // successful parsing brings us here: |
1524 | result.insert( maybeParameter.first, maybeParameter.second ); |
1525 | |
1526 | eatCFWS( scursor, send, isCRLF ); |
1527 | // end of header: ends list. |
1528 | if ( scursor == send ) { |
1529 | return true; |
1530 | } |
1531 | // regular separator: eat it. |
1532 | if ( *scursor == ';' ) { |
1533 | scursor++; |
1534 | } |
1535 | } |
1536 | return true; |
1537 | } |
1538 | |
1539 | static void ( Codec* &rfc2231Codec, |
1540 | QTextCodec* &textcodec, |
1541 | bool isContinuation, QString &value, |
1542 | QPair<const char*,int> &source, QByteArray& charset ) |
1543 | { |
1544 | // |
1545 | // parse the raw value into (charset,language,text): |
1546 | // |
1547 | |
1548 | const char * decBegin = source.first; |
1549 | const char * decCursor = decBegin; |
1550 | const char * decEnd = decCursor + source.second; |
1551 | |
1552 | if ( !isContinuation ) { |
1553 | // find the first single quote |
1554 | while ( decCursor != decEnd ) { |
1555 | if ( *decCursor == '\'' ) { |
1556 | break; |
1557 | } else { |
1558 | decCursor++; |
1559 | } |
1560 | } |
1561 | |
1562 | if ( decCursor == decEnd ) { |
1563 | // there wasn't a single single quote at all! |
1564 | // take the whole value to be in latin-1: |
1565 | KMIME_WARN << "No charset in extended-initial-value." |
1566 | "Assuming \"iso-8859-1\"." ; |
1567 | value += QString::fromLatin1( decBegin, source.second ); |
1568 | return; |
1569 | } |
1570 | |
1571 | charset = QByteArray( decBegin, decCursor - decBegin ); |
1572 | |
1573 | const char * oldDecCursor = ++decCursor; |
1574 | // find the second single quote (we ignore the language tag): |
1575 | while ( decCursor != decEnd ) { |
1576 | if ( *decCursor == '\'' ) { |
1577 | break; |
1578 | } else { |
1579 | decCursor++; |
1580 | } |
1581 | } |
1582 | if ( decCursor == decEnd ) { |
1583 | KMIME_WARN << "No language in extended-initial-value." |
1584 | "Trying to recover." ; |
1585 | decCursor = oldDecCursor; |
1586 | } else { |
1587 | decCursor++; |
1588 | } |
1589 | |
1590 | // decCursor now points to the start of the |
1591 | // "extended-other-values": |
1592 | |
1593 | // |
1594 | // get the decoders: |
1595 | // |
1596 | |
1597 | bool matchOK = false; |
1598 | textcodec = KGlobal::charsets()->codecForName( QLatin1String( charset ), matchOK ); |
1599 | if ( !matchOK ) { |
1600 | textcodec = 0; |
1601 | KMIME_WARN_UNKNOWN( Charset, charset ); |
1602 | } |
1603 | } |
1604 | |
1605 | if ( !rfc2231Codec ) { |
1606 | rfc2231Codec = Codec::codecForName( "x-kmime-rfc2231" ); |
1607 | assert( rfc2231Codec ); |
1608 | } |
1609 | |
1610 | if ( !textcodec ) { |
1611 | value += QString::fromLatin1( decCursor, decEnd - decCursor ); |
1612 | return; |
1613 | } |
1614 | |
1615 | Decoder * dec = rfc2231Codec->makeDecoder(); |
1616 | assert( dec ); |
1617 | |
1618 | // |
1619 | // do the decoding: |
1620 | // |
1621 | |
1622 | QByteArray buffer; |
1623 | buffer.resize( rfc2231Codec->maxDecodedSizeFor( decEnd - decCursor ) ); |
1624 | QByteArray::Iterator bit = buffer.begin(); |
1625 | QByteArray::ConstIterator bend = buffer.end(); |
1626 | |
1627 | if ( !dec->decode( decCursor, decEnd, bit, bend ) ) { |
1628 | KMIME_WARN << rfc2231Codec->name() |
1629 | << "codec lies about its maxDecodedSizeFor()" << endl |
1630 | << "result may be truncated" ; |
1631 | } |
1632 | |
1633 | value += textcodec->toUnicode( buffer.begin(), bit - buffer.begin() ); |
1634 | |
1635 | // kDebug() << "value now: \"" << value << "\""; |
1636 | // cleanup: |
1637 | delete dec; |
1638 | } |
1639 | |
1640 | // known issues: |
1641 | // - permutes rfc2231 continuations when the total number of parts |
1642 | // exceeds 10 (other-sections then becomes *xy, ie. two digits) |
1643 | |
1644 | bool ( const char* &scursor, |
1645 | const char * const send, |
1646 | QMap<QString,QString> &result, |
1647 | QByteArray& charset, bool isCRLF ) |
1648 | { |
1649 | // parse the list into raw attribute-value pairs: |
1650 | QMap<QString, QStringOrQPair> rawParameterList; |
1651 | if ( !parseRawParameterList( scursor, send, rawParameterList, isCRLF ) ) { |
1652 | return false; |
1653 | } |
1654 | |
1655 | if ( rawParameterList.isEmpty() ) { |
1656 | return true; |
1657 | } |
1658 | |
1659 | // decode rfc 2231 continuations and alternate charset encoding: |
1660 | |
1661 | // NOTE: this code assumes that what QMapIterator delivers is sorted |
1662 | // by the key! |
1663 | |
1664 | Codec * rfc2231Codec = 0; |
1665 | QTextCodec * textcodec = 0; |
1666 | QString attribute; |
1667 | QString value; |
1668 | enum Mode { |
1669 | NoMode = 0x0, Continued = 0x1, Encoded = 0x2 |
1670 | }; |
1671 | |
1672 | enum EncodingMode { |
1673 | NoEncoding, |
1674 | RFC2047, |
1675 | RFC2231 |
1676 | }; |
1677 | |
1678 | QMap<QString, QStringOrQPair>::Iterator it, end = rawParameterList.end(); |
1679 | |
1680 | for ( it = rawParameterList.begin() ; it != end ; ++it ) { |
1681 | if ( attribute.isNull() || !it.key().startsWith( attribute ) ) { |
1682 | // |
1683 | // new attribute: |
1684 | // |
1685 | |
1686 | // store the last attribute/value pair in the result map now: |
1687 | if ( !attribute.isNull() ) { |
1688 | result.insert( attribute, value ); |
1689 | } |
1690 | // and extract the information from the new raw attribute: |
1691 | value.clear(); |
1692 | attribute = it.key(); |
1693 | int mode = NoMode; |
1694 | EncodingMode encodingMode = NoEncoding; |
1695 | |
1696 | // is the value rfc2331-encoded? |
1697 | if ( attribute.endsWith( asterisk ) ) { |
1698 | attribute.truncate( attribute.length() - 1 ); |
1699 | mode |= Encoded; |
1700 | encodingMode = RFC2231; |
1701 | } |
1702 | // is the value rfc2047-encoded? |
1703 | if ( !( *it ).qstring.isNull() && ( *it ).qstring.contains( QLatin1String( "=?" ) ) ) { |
1704 | mode |= Encoded; |
1705 | encodingMode = RFC2047; |
1706 | } |
1707 | // is the value continued? |
1708 | if ( attribute.endsWith( asteriskZero ) ) { |
1709 | attribute.truncate( attribute.length() - 2 ); |
1710 | mode |= Continued; |
1711 | } |
1712 | // |
1713 | // decode if necessary: |
1714 | // |
1715 | if ( mode & Encoded ) { |
1716 | if ( encodingMode == RFC2231 ) { |
1717 | decodeRFC2231Value( rfc2231Codec, textcodec, |
1718 | false, /* isn't continuation */ |
1719 | value, ( *it ).qpair, charset ); |
1720 | } |
1721 | else if ( encodingMode == RFC2047 ) { |
1722 | value += decodeRFC2047String( ( *it ).qstring.toLatin1(), charset ); |
1723 | } |
1724 | } else { |
1725 | // not encoded. |
1726 | if ( ( *it ).qpair.first ) { |
1727 | value += QString::fromLatin1( ( *it ).qpair.first, ( *it ).qpair.second ); |
1728 | } else { |
1729 | value += ( *it ).qstring; |
1730 | } |
1731 | } |
1732 | |
1733 | // |
1734 | // shortcut-processing when the value isn't encoded: |
1735 | // |
1736 | |
1737 | if ( !( mode & Continued ) ) { |
1738 | // save result already: |
1739 | result.insert( attribute, value ); |
1740 | // force begin of a new attribute: |
1741 | attribute.clear(); |
1742 | } |
1743 | } else { // it.key().startsWith( attribute ) |
1744 | // |
1745 | // continuation |
1746 | // |
1747 | |
1748 | // ignore the section and trust QMap to have sorted the keys: |
1749 | if ( it.key().endsWith( asterisk ) ) { |
1750 | // encoded |
1751 | decodeRFC2231Value( rfc2231Codec, textcodec, |
1752 | true, /* is continuation */ |
1753 | value, ( *it ).qpair, charset ); |
1754 | } else { |
1755 | // not encoded |
1756 | if ( ( *it ).qpair.first ) { |
1757 | value += QString::fromLatin1( ( *it ).qpair.first, ( *it ).qpair.second ); |
1758 | } else { |
1759 | value += ( *it ).qstring; |
1760 | } |
1761 | } |
1762 | } |
1763 | } |
1764 | |
1765 | // write last attr/value pair: |
1766 | if ( !attribute.isNull() ) { |
1767 | result.insert( attribute, value ); |
1768 | } |
1769 | |
1770 | return true; |
1771 | } |
1772 | |
1773 | |
1774 | bool ( const char* &scursor, const char * const send, |
1775 | QMap<QString,QString> &result, bool isCRLF ) |
1776 | { |
1777 | QByteArray charset; |
1778 | return parseParameterListWithCharset( scursor, send, result, charset, isCRLF ); |
1779 | } |
1780 | |
1781 | static const char * const [] = { |
1782 | "Sun" , "Mon" , "Tue" , "Wed" , "Thu" , "Fri" , "Sat" |
1783 | }; |
1784 | static const int = sizeof stdDayNames / sizeof *stdDayNames; |
1785 | |
1786 | static bool ( const char* &scursor, const char * const send ) |
1787 | { |
1788 | // check bounds: |
1789 | if ( send - scursor < 3 ) { |
1790 | return false; |
1791 | } |
1792 | |
1793 | for ( int i = 0 ; i < stdDayNamesLen ; ++i ) { |
1794 | if ( qstrnicmp( scursor, stdDayNames[i], 3 ) == 0 ) { |
1795 | scursor += 3; |
1796 | // kDebug() << "found" << stdDayNames[i]; |
1797 | return true; |
1798 | } |
1799 | } |
1800 | |
1801 | return false; |
1802 | } |
1803 | |
1804 | static const char * const [] = { |
1805 | "Jan" , "Feb" , "Mar" , "Apr" , "May" , "Jun" , |
1806 | "Jul" , "Aug" , "Sep" , "Oct" , "Nov" , "Dec" |
1807 | }; |
1808 | static const int = |
1809 | sizeof stdMonthNames / sizeof *stdMonthNames; |
1810 | |
1811 | static bool ( const char* &scursor, const char * const send, |
1812 | int &result ) |
1813 | { |
1814 | // check bounds: |
1815 | if ( send - scursor < 3 ) { |
1816 | return false; |
1817 | } |
1818 | |
1819 | for ( result = 0 ; result < stdMonthNamesLen ; ++result ) { |
1820 | if ( qstrnicmp( scursor, stdMonthNames[result], 3 ) == 0 ) { |
1821 | scursor += 3; |
1822 | return true; |
1823 | } |
1824 | } |
1825 | |
1826 | // not found: |
1827 | return false; |
1828 | } |
1829 | |
1830 | static const struct { |
1831 | const char * tzName; |
1832 | long int secsEastOfGMT; |
1833 | } [] = { |
1834 | // rfc 822 timezones: |
1835 | { "GMT" , 0 }, |
1836 | { "UT" , 0 }, |
1837 | { "EDT" , -4*3600 }, |
1838 | { "EST" , -5*3600 }, |
1839 | { "MST" , -5*3600 }, |
1840 | { "CST" , -6*3600 }, |
1841 | { "MDT" , -6*3600 }, |
1842 | { "MST" , -7*3600 }, |
1843 | { "PDT" , -7*3600 }, |
1844 | { "PST" , -8*3600 }, |
1845 | // common, non-rfc-822 zones: |
1846 | { "CET" , 1*3600 }, |
1847 | { "MET" , 1*3600 }, |
1848 | { "UTC" , 0 }, |
1849 | { "CEST" , 2*3600 }, |
1850 | { "BST" , 1*3600 }, |
1851 | // rfc 822 military timezones: |
1852 | { "Z" , 0 }, |
1853 | { "A" , -1*3600 }, |
1854 | { "B" , -2*3600 }, |
1855 | { "C" , -3*3600 }, |
1856 | { "D" , -4*3600 }, |
1857 | { "E" , -5*3600 }, |
1858 | { "F" , -6*3600 }, |
1859 | { "G" , -7*3600 }, |
1860 | { "H" , -8*3600 }, |
1861 | { "I" , -9*3600 }, |
1862 | // J is not used! |
1863 | { "K" , -10*3600 }, |
1864 | { "L" , -11*3600 }, |
1865 | { "M" , -12*3600 }, |
1866 | { "N" , 1*3600 }, |
1867 | { "O" , 2*3600 }, |
1868 | { "P" , 3*3600 }, |
1869 | { "Q" , 4*3600 }, |
1870 | { "R" , 5*3600 }, |
1871 | { "S" , 6*3600 }, |
1872 | { "T" , 7*3600 }, |
1873 | { "U" , 8*3600 }, |
1874 | { "V" , 9*3600 }, |
1875 | { "W" , 10*3600 }, |
1876 | { "X" , 11*3600 }, |
1877 | { "Y" , 12*3600 }, |
1878 | }; |
1879 | static const int = sizeof timeZones / sizeof *timeZones; |
1880 | |
1881 | static bool ( const char* &scursor, |
1882 | const char * const send, |
1883 | long int &secsEastOfGMT, |
1884 | bool &timeZoneKnown ) |
1885 | { |
1886 | // allow the timezone to be wrapped in quotes; bug 260761 |
1887 | if ( *scursor == '"' ) { |
1888 | scursor++; |
1889 | |
1890 | if ( scursor == send ) { |
1891 | return false; |
1892 | } |
1893 | } |
1894 | |
1895 | QPair<const char*, int> maybeTimeZone( 0, 0 ); |
1896 | if ( !parseToken( scursor, send, maybeTimeZone, false /*no 8bit*/ ) ) { |
1897 | return false; |
1898 | } |
1899 | for ( int i = 0 ; i < timeZonesLen ; ++i ) { |
1900 | if ( qstrnicmp( timeZones[i].tzName, |
1901 | maybeTimeZone.first, maybeTimeZone.second ) == 0 ) { |
1902 | scursor += maybeTimeZone.second; |
1903 | secsEastOfGMT = timeZones[i].secsEastOfGMT; |
1904 | timeZoneKnown = true; |
1905 | |
1906 | if ( *scursor == '"' ) { |
1907 | scursor++; |
1908 | } |
1909 | |
1910 | return true; |
1911 | } |
1912 | } |
1913 | |
1914 | // don't choke just because we don't happen to know the time zone |
1915 | KMIME_WARN_UNKNOWN( time zone, |
1916 | QByteArray( maybeTimeZone.first, maybeTimeZone.second ) ); |
1917 | secsEastOfGMT = 0; |
1918 | timeZoneKnown = false; |
1919 | return true; |
1920 | } |
1921 | |
1922 | // parse a number and return the number of digits parsed: |
1923 | int ( const char* &scursor, const char * const send, int &result ) |
1924 | { |
1925 | result = 0; |
1926 | int digits = 0; |
1927 | for ( ; scursor != send && isdigit( *scursor ) ; scursor++, digits++ ) { |
1928 | result *= 10; |
1929 | result += int( *scursor - '0' ); |
1930 | } |
1931 | return digits; |
1932 | } |
1933 | |
1934 | static bool ( const char* &scursor, const char * const send, |
1935 | int &hour, int &min, int &sec, bool isCRLF=false ) |
1936 | { |
1937 | // time-of-day := 2DIGIT [CFWS] ":" [CFWS] 2DIGIT [ [CFWS] ":" 2DIGIT ] |
1938 | |
1939 | // |
1940 | // 2DIGIT representing "hour": |
1941 | // |
1942 | if ( !parseDigits( scursor, send, hour ) ) { |
1943 | return false; |
1944 | } |
1945 | |
1946 | eatCFWS( scursor, send, isCRLF ); |
1947 | if ( scursor == send || *scursor != ':' ) { |
1948 | return false; |
1949 | } |
1950 | scursor++; // eat ':' |
1951 | |
1952 | eatCFWS( scursor, send, isCRLF ); |
1953 | if ( scursor == send ) { |
1954 | return false; |
1955 | } |
1956 | |
1957 | // |
1958 | // 2DIGIT representing "minute": |
1959 | // |
1960 | if ( !parseDigits( scursor, send, min ) ) { |
1961 | return false; |
1962 | } |
1963 | |
1964 | eatCFWS( scursor, send, isCRLF ); |
1965 | if ( scursor == send ) { |
1966 | return true; // seconds are optional |
1967 | } |
1968 | |
1969 | // |
1970 | // let's see if we have a 2DIGIT representing "second": |
1971 | // |
1972 | if ( *scursor == ':' ) { |
1973 | // yepp, there are seconds: |
1974 | scursor++; // eat ':' |
1975 | eatCFWS( scursor, send, isCRLF ); |
1976 | if ( scursor == send ) { |
1977 | return false; |
1978 | } |
1979 | |
1980 | if ( !parseDigits( scursor, send, sec ) ) { |
1981 | return false; |
1982 | } |
1983 | } else { |
1984 | sec = 0; |
1985 | } |
1986 | |
1987 | return true; |
1988 | } |
1989 | |
1990 | bool ( const char* &scursor, const char * send, |
1991 | int &hour, int &min, int &sec, long int &secsEastOfGMT, |
1992 | bool &timeZoneKnown, bool isCRLF ) |
1993 | { |
1994 | // time := time-of-day CFWS ( zone / obs-zone ) |
1995 | // |
1996 | // obs-zone := "UT" / "GMT" / |
1997 | // "EST" / "EDT" / ; -0500 / -0400 |
1998 | // "CST" / "CDT" / ; -0600 / -0500 |
1999 | // "MST" / "MDT" / ; -0700 / -0600 |
2000 | // "PST" / "PDT" / ; -0800 / -0700 |
2001 | // "A"-"I" / "a"-"i" / |
2002 | // "K"-"Z" / "k"-"z" |
2003 | |
2004 | eatCFWS( scursor, send, isCRLF ); |
2005 | if ( scursor == send ) { |
2006 | return false; |
2007 | } |
2008 | |
2009 | if ( !parseTimeOfDay( scursor, send, hour, min, sec, isCRLF ) ) { |
2010 | return false; |
2011 | } |
2012 | |
2013 | eatCFWS( scursor, send, isCRLF ); |
2014 | // there might be no timezone but a year following |
2015 | if ( ( scursor == send ) || isdigit( *scursor ) ) { |
2016 | timeZoneKnown = false; |
2017 | secsEastOfGMT = 0; |
2018 | return true; // allow missing timezone |
2019 | } |
2020 | |
2021 | timeZoneKnown = true; |
2022 | if ( *scursor == '+' || *scursor == '-' ) { |
2023 | // remember and eat '-'/'+': |
2024 | const char sign = *scursor++; |
2025 | // numerical timezone: |
2026 | int maybeTimeZone; |
2027 | if ( parseDigits( scursor, send, maybeTimeZone ) != 4 ) { |
2028 | return false; |
2029 | } |
2030 | secsEastOfGMT = 60 * ( maybeTimeZone / 100 * 60 + maybeTimeZone % 100 ); |
2031 | if ( sign == '-' ) { |
2032 | secsEastOfGMT *= -1; |
2033 | if ( secsEastOfGMT == 0 ) { |
2034 | timeZoneKnown = false; // -0000 means indetermined tz |
2035 | } |
2036 | } |
2037 | } else { |
2038 | // maybe alphanumeric timezone: |
2039 | if ( !parseAlphaNumericTimeZone( scursor, send, secsEastOfGMT, timeZoneKnown ) ) { |
2040 | return false; |
2041 | } |
2042 | } |
2043 | return true; |
2044 | } |
2045 | |
2046 | bool ( const char* &scursor, const char * const send, |
2047 | KDateTime &result, bool isCRLF ) |
2048 | { |
2049 | // Parsing date-time; strict mode: |
2050 | // |
2051 | // date-time := [ [CFWS] day-name [CFWS] "," ] ; wday |
2052 | // (expanded) [CFWS] 1*2DIGIT CFWS month-name CFWS 2*DIGIT [CFWS] ; date |
2053 | // time |
2054 | // |
2055 | // day-name := "Mon" / "Tue" / "Wed" / "Thu" / "Fri" / "Sat" / "Sun" |
2056 | // month-name := "Jan" / "Feb" / "Mar" / "Apr" / "May" / "Jun" / |
2057 | // "Jul" / "Aug" / "Sep" / "Oct" / "Nov" / "Dec" |
2058 | |
2059 | result = KDateTime(); |
2060 | QDateTime maybeDateTime; |
2061 | |
2062 | eatCFWS( scursor, send, isCRLF ); |
2063 | if ( scursor == send ) { |
2064 | return false; |
2065 | } |
2066 | |
2067 | // |
2068 | // let's see if there's a day-of-week: |
2069 | // |
2070 | if ( parseDayName( scursor, send ) ) { |
2071 | eatCFWS( scursor, send, isCRLF ); |
2072 | if ( scursor == send ) { |
2073 | return false; |
2074 | } |
2075 | // day-name should be followed by ',' but we treat it as optional: |
2076 | if ( *scursor == ',' ) { |
2077 | scursor++; // eat ',' |
2078 | eatCFWS( scursor, send, isCRLF ); |
2079 | } |
2080 | } |
2081 | |
2082 | int maybeMonth = -1; |
2083 | bool asctimeFormat = false; |
2084 | |
2085 | // ANSI-C asctime() format is: Wed Jun 30 21:49:08 1993 |
2086 | if ( !isdigit( *scursor ) && parseMonthName( scursor, send, maybeMonth ) ) { |
2087 | asctimeFormat = true; |
2088 | eatCFWS( scursor, send, isCRLF ); |
2089 | } |
2090 | |
2091 | // |
2092 | // 1*2DIGIT representing "day" (of month): |
2093 | // |
2094 | int maybeDay; |
2095 | if ( !parseDigits( scursor, send, maybeDay ) ) { |
2096 | return false; |
2097 | } |
2098 | |
2099 | eatCFWS( scursor, send, isCRLF ); |
2100 | if ( scursor == send ) { |
2101 | return false; |
2102 | } |
2103 | |
2104 | // ignore ","; bug 54098 |
2105 | if ( *scursor == ',' ) { |
2106 | scursor++; |
2107 | } |
2108 | |
2109 | // |
2110 | // month-name: |
2111 | // |
2112 | if ( !asctimeFormat && !parseMonthName( scursor, send, maybeMonth ) ) { |
2113 | return false; |
2114 | } |
2115 | if ( scursor == send ) { |
2116 | return false; |
2117 | } |
2118 | assert( maybeMonth >= 0 ); assert( maybeMonth <= 11 ); |
2119 | ++maybeMonth; // 0-11 -> 1-12 |
2120 | |
2121 | eatCFWS( scursor, send, isCRLF ); |
2122 | if ( scursor == send ) { |
2123 | return false; |
2124 | } |
2125 | |
2126 | // check for "year HH:MM:SS" or only "HH:MM:SS" (or "H:MM:SS") |
2127 | bool timeAfterYear = true; |
2128 | if ( ( send - scursor > 3 ) && ( ( scursor[1] == ':' ) || ( scursor[2] == ':' ) ) ) { |
2129 | timeAfterYear = false; // first read time, then year |
2130 | } |
2131 | |
2132 | // |
2133 | // 2*DIGIT representing "year": |
2134 | // |
2135 | int maybeYear = 0; |
2136 | |
2137 | if ( timeAfterYear && !parseDigits( scursor, send, maybeYear ) ) { |
2138 | return false; |
2139 | } |
2140 | |
2141 | eatCFWS( scursor, send, isCRLF ); |
2142 | if ( scursor == send ) { |
2143 | return false; |
2144 | } |
2145 | |
2146 | // |
2147 | // time |
2148 | // |
2149 | int maybeHour, maybeMinute, maybeSecond; |
2150 | long int secsEastOfGMT; |
2151 | bool timeZoneKnown = true; |
2152 | |
2153 | if ( !parseTime( scursor, send, |
2154 | maybeHour, maybeMinute, maybeSecond, |
2155 | secsEastOfGMT, timeZoneKnown, isCRLF ) ) { |
2156 | return false; |
2157 | } |
2158 | |
2159 | // in asctime() the year follows the time |
2160 | if ( !timeAfterYear ) { |
2161 | eatCFWS( scursor, send, isCRLF ); |
2162 | if ( scursor == send ) { |
2163 | return false; |
2164 | } |
2165 | |
2166 | if ( !parseDigits( scursor, send, maybeYear ) ) { |
2167 | return false; |
2168 | } |
2169 | } |
2170 | |
2171 | // RFC 2822 4.3 processing: |
2172 | if ( maybeYear < 50 ) { |
2173 | maybeYear += 2000; |
2174 | } else if ( maybeYear < 1000 ) { |
2175 | maybeYear += 1900; |
2176 | } |
2177 | // else keep as is |
2178 | if ( maybeYear < 1900 ) { |
2179 | return false; // rfc2822, 3.3 |
2180 | } |
2181 | |
2182 | maybeDateTime.setDate( QDate( maybeYear, maybeMonth, maybeDay ) ); |
2183 | maybeDateTime.setTime( QTime( maybeHour, maybeMinute, maybeSecond ) ); |
2184 | |
2185 | if ( !maybeDateTime.isValid() ) { |
2186 | return false; |
2187 | } |
2188 | |
2189 | result = KDateTime( maybeDateTime, KDateTime::Spec( KDateTime::OffsetFromUTC, secsEastOfGMT ) ); |
2190 | if ( !result.isValid() ) { |
2191 | return false; |
2192 | } |
2193 | return true; |
2194 | } |
2195 | |
2196 | Headers::Base *( QByteArray &head ) |
2197 | { |
2198 | int endOfFieldBody = 0; |
2199 | bool folded = false; |
2200 | Headers::Base * = 0; |
2201 | |
2202 | int startOfFieldBody = head.indexOf( ':' ); |
2203 | const int = startOfFieldBody; |
2204 | |
2205 | if ( startOfFieldBody > -1 ) { //there is another header |
2206 | startOfFieldBody++; //skip the ':' |
2207 | if ( head[startOfFieldBody] == ' ' ) { // skip the space after the ':', if there |
2208 | startOfFieldBody++; |
2209 | } |
2210 | endOfFieldBody = findHeaderLineEnd( head, startOfFieldBody, &folded ); |
2211 | |
2212 | QByteArray rawType = head.left( endOfFieldHeader ); |
2213 | QByteArray rawFieldBody = head.mid( startOfFieldBody, endOfFieldBody - startOfFieldBody ); |
2214 | if ( folded ) { |
2215 | rawFieldBody = unfoldHeader( rawFieldBody ); |
2216 | } |
2217 | // We might get an invalid mail without a field name, don't crash on that. |
2218 | if ( !rawType.isEmpty() ) { |
2219 | header = HeaderFactory::self()->createHeader( rawType ); |
2220 | } |
2221 | if ( !header ) { |
2222 | //kWarning() << "Returning Generic header of type" << rawType; |
2223 | header = new Headers::Generic( rawType.constData() ); |
2224 | } |
2225 | header->from7BitString( rawFieldBody ); |
2226 | |
2227 | head.remove( 0, endOfFieldBody + 1 ); |
2228 | } else { |
2229 | head.clear(); |
2230 | } |
2231 | |
2232 | return header; |
2233 | } |
2234 | |
2235 | void extractHeaderAndBody( const QByteArray &content, QByteArray &, QByteArray &body ) |
2236 | { |
2237 | header.clear(); |
2238 | body.clear(); |
2239 | |
2240 | // empty header |
2241 | if ( content.startsWith( '\n' ) ) { |
2242 | body = content.right( content.length() - 1 ); |
2243 | return; |
2244 | } |
2245 | |
2246 | int pos = content.indexOf( "\n\n" , 0 ); |
2247 | if ( pos > -1 ) { |
2248 | header = content.left( ++pos ); //header *must* end with "\n" !! |
2249 | body = content.mid( pos + 1, content.length() - pos - 1 ); |
2250 | } else { |
2251 | header = content; |
2252 | } |
2253 | } |
2254 | |
2255 | Headers::Base::List ( const QByteArray &head ) |
2256 | { |
2257 | Headers::Base::List ret; |
2258 | Headers::Base *h; |
2259 | |
2260 | QByteArray copy = head; |
2261 | while ( ( h = extractFirstHeader( copy ) ) ) { |
2262 | ret << h; |
2263 | } |
2264 | |
2265 | return ret; |
2266 | } |
2267 | |
2268 | } // namespace HeaderParsing |
2269 | |
2270 | } // namespace KMime |
2271 | |