1/* -*- c++ -*-
2 kmime_util.h
3
4 KMime, the KDE Internet mail/usenet news message library.
5 Copyright (c) 2001 the KMime authors.
6 See file AUTHORS for details
7
8 This library is free software; you can redistribute it and/or
9 modify it under the terms of the GNU Library General Public
10 License as published by the Free Software Foundation; either
11 version 2 of the License, or (at your option) any later version.
12
13 This library is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Library General Public License for more details.
17
18 You should have received a copy of the GNU Library General Public License
19 along with this library; see the file COPYING.LIB. If not, write to
20 the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
21 Boston, MA 02110-1301, USA.
22*/
23#ifndef __KMIME_UTIL_H__
24#define __KMIME_UTIL_H__
25
26#include <QtCore/QString>
27#include "kmime_export.h"
28#include "kmime_headers.h"
29#include "kmime_content.h"
30
31namespace KMime {
32
33class Message;
34
35/**
36 Consult the charset cache. Only used for reducing mem usage by
37 keeping strings in a common repository.
38 @param name
39*/
40KMIME_EXPORT extern QByteArray cachedCharset( const QByteArray &name );
41
42/**
43 Consult the language cache. Only used for reducing mem usage by
44 keeping strings in a common repository.
45 @param name
46*/
47KMIME_EXPORT extern QByteArray cachedLanguage( const QByteArray &name );
48
49/**
50 Checks whether @p s contains any non-us-ascii characters.
51 @param s
52*/
53KMIME_EXPORT extern bool isUsAscii( const QString &s );
54
55/**
56 Returns a user-visible string for a contentEncoding, for example
57 "quoted-printable" for CEquPr.
58 @param enc the contentEncoding to return string for
59 @ since 4.4
60 TODO should they be i18n'ed?
61*/
62KMIME_EXPORT extern QString nameForEncoding( KMime::Headers::contentEncoding enc );
63
64/**
65 Returns a list of encodings that can correctly encode the @p data.
66 @param data the data to check encodings for
67 @ since 4.4
68*/
69KMIME_EXPORT QList<KMime::Headers::contentEncoding> encodingsForData(
70 const QByteArray &data );
71//@cond PRIVATE
72extern const uchar specialsMap[16];
73extern const uchar tSpecialsMap[16];
74extern const uchar aTextMap[16];
75extern const uchar tTextMap[16];
76extern const uchar eTextMap[16];
77
78inline bool isOfSet( const uchar map[16], unsigned char ch )
79{
80 return ( ch < 128 ) && ( map[ ch/8 ] & 0x80 >> ch%8 );
81}
82inline bool isSpecial( char ch )
83{
84 return isOfSet( specialsMap, ch );
85}
86inline bool isTSpecial( char ch )
87{
88 return isOfSet( tSpecialsMap, ch );
89}
90inline bool isAText( char ch )
91{
92 return isOfSet( aTextMap, ch );
93}
94inline bool isTText( char ch )
95{
96 return isOfSet( tTextMap, ch );
97}
98inline bool isEText( char ch )
99{
100 return isOfSet( eTextMap, ch );
101}
102//@endcond
103
104/**
105 * Set the fallback charset to use when decoding RFC2047-encoded headers.
106 * If decoding according to the RFC fails, then the fallback encoding is
107 * used instead.
108 *
109 * @param fallbackCharEnc Name of fallback character encoding to use.
110 *
111 * @since 4.5
112 */
113KMIME_EXPORT extern void setFallbackCharEncoding( const QString& fallbackCharEnc );
114
115/**
116 * Retrieve the set fallback charset if there is one set.
117 *
118 * @return The name of the fallback encoding, if one was set, otherwise
119 * an empty QString.
120 *
121 * @since 4.5
122 */
123KMIME_EXPORT extern QString fallbackCharEncoding();
124
125/**
126 * Set whether or not to use outlook compatible attachment filename encoding. Outlook
127 * fails to properly adhere to the RFC2322 standard for parametrized header fields, and
128 * instead is only able to read and write attachment filenames encoded in RFC2047-style.
129 * This will create mails that are not standards-compliant!
130 *
131 * @param violateStandard Whether or not to use outlook-compatible attachment
132 * filename encodings.
133 *
134 * @since 4.5
135 */
136KMIME_EXPORT extern void setUseOutlookAttachmentEncoding( bool violateStandard );
137
138/**
139 * Retrieve whether or not to use outlook compatible encodings for attachments.
140 */
141KMIME_EXPORT extern bool useOutlookAttachmentEncoding();
142/**
143 Decodes string @p src according to RFC2047,i.e., the construct
144 =?charset?[qb]?encoded?=
145
146 @param src source string.
147 @param usedCS the detected charset is returned here
148 @param defaultCS the charset to use in case the detected
149 one isn't known to us.
150 @param forceCS force the use of the default charset.
151
152 @return the decoded string.
153*/
154KMIME_EXPORT extern QString decodeRFC2047String(
155 const QByteArray &src, QByteArray &usedCS, const QByteArray &defaultCS = QByteArray(),
156 bool forceCS = false );
157
158/** Decode string @p src according to RFC2047 (ie. the
159 =?charset?[qb]?encoded?= construct).
160
161 @param src source string.
162 @return the decoded string.
163*/
164KMIME_EXPORT extern QString decodeRFC2047String( const QByteArray &src );
165
166/**
167 Encodes string @p src according to RFC2047 using charset @p charset.
168
169 This function also makes commas, quotes and other characters part of the encoded name, for example
170 the string "Jöhn Döe" <john@example.com"> would be encoded as <encoded word for "Jöhn Döe"> <john@example.com>,
171 i.e. the opening and closing quote mark would be part of the encoded word.
172 Therefore don't use this function for input strings that contain semantically meaningful characters,
173 like the quoting marks in this example.
174
175 @param src source string.
176 @param charset charset to use. If it can't encode the string, UTF-8 will be used instead.
177 @param addressHeader if this flag is true, all special chars
178 like <,>,[,],... will be encoded, too.
179 @param allow8bitHeaders if this flag is true, 8Bit headers are allowed.
180
181 @return the encoded string.
182*/
183KMIME_EXPORT extern QByteArray encodeRFC2047String(
184 const QString &src, const QByteArray &charset, bool addressHeader=false,
185 bool allow8bitHeaders=false );
186
187
188/**
189 Decodes string @p src according to RFC2231
190
191 @param src source string.
192 @param usedCs the detected charset is returned here
193 @param defaultCS the charset to use in case the detected
194 one isn't known to us.
195 @param forceCS force the use of the default charset.
196
197 @return the decoded string.
198*/
199KMIME_EXPORT extern QString decodeRFC2231String(
200 const QByteArray &src, QByteArray &usedCS, const QByteArray &defaultCS = QByteArray(),
201 bool forceCS = false );
202
203/** Decode string @p src according to RFC2231 (ie. the
204 charset'lang'encoded construct).
205
206 @param src source string.
207 @return the decoded string.
208*/
209KMIME_EXPORT extern QString decodeRFC2231String( const QByteArray &src );
210
211
212/**
213 Encodes string @p src according to RFC2231 using charset @p charset.
214
215 @param src source string.
216 @param charset charset to use.
217 @return the encoded string.
218*/
219KMIME_EXPORT extern QByteArray encodeRFC2231String( const QString &src, const QByteArray &charset );
220
221/**
222 Uses current time, pid and random numbers to construct a string
223 that aims to be unique on a per-host basis (ie. for the local
224 part of a message-id or for multipart boundaries.
225
226 @return the unique string.
227 @see multiPartBoundary
228*/
229KMIME_EXPORT extern QByteArray uniqueString();
230
231/**
232 Constructs a random string (sans leading/trailing "--") that can
233 be used as a multipart delimiter (ie. as @p boundary parameter
234 to a multipart/... content-type).
235
236 @return the randomized string.
237 @see uniqueString
238*/
239KMIME_EXPORT extern QByteArray multiPartBoundary();
240
241/**
242 Unfolds the given header if necessary.
243 @param header The header to unfold.
244*/
245KMIME_EXPORT extern QByteArray unfoldHeader( const QByteArray &header );
246
247/**
248 Tries to extract the header with name @p name from the string
249 @p src, unfolding it if necessary.
250
251 @param src the source string.
252 @param name the name of the header to search for.
253
254 @return the first instance of the header @p name in @p src
255 or a null QCString if no such header was found.
256*/
257KMIME_EXPORT extern QByteArray extractHeader( const QByteArray &src,
258 const QByteArray &name );
259
260/**
261 Tries to extract the headers with name @p name from the string
262 @p src, unfolding it if necessary.
263
264 @param src the source string.
265 @param name the name of the header to search for.
266
267 @return all instances of the header @p name in @p src
268
269 @since 4.2
270*/
271KMIME_EXPORT extern QList<QByteArray> extractHeaders( const QByteArray &src,
272 const QByteArray &name );
273
274/**
275 Converts all occurrences of "\r\n" (CRLF) in @p s to "\n" (LF).
276
277 This function is expensive and should be used only if the mail
278 will be stored locally. All decode functions can cope with both
279 line endings.
280
281 @param s source string containing CRLF's
282
283 @return the string with CRLF's substitued for LF's
284 @see CRLFtoLF(const char*) LFtoCRLF
285*/
286KMIME_EXPORT extern QByteArray CRLFtoLF( const QByteArray &s );
287
288/**
289 Converts all occurrences of "\r\n" (CRLF) in @p s to "\n" (LF).
290
291 This function is expensive and should be used only if the mail
292 will be stored locally. All decode functions can cope with both
293 line endings.
294
295 @param s source string containing CRLF's
296
297 @return the string with CRLF's substitued for LF's
298 @see CRLFtoLF(const QCString&) LFtoCRLF
299*/
300KMIME_EXPORT extern QByteArray CRLFtoLF( const char *s );
301
302/**
303 Converts all occurrences of "\n" (LF) in @p s to "\r\n" (CRLF).
304
305 This function is expensive and should be used only if the mail
306 will be transmitted as an RFC822 message later. All decode
307 functions can cope with and all encode functions can optionally
308 produce both line endings, which is much faster.
309
310 @param s source string containing CRLF's
311
312 @return the string with CRLF's substitued for LF's
313 @see CRLFtoLF(const QCString&) LFtoCRLF
314*/
315KMIME_EXPORT extern QByteArray LFtoCRLF( const QByteArray &s );
316
317/**
318 Removes quote (DQUOTE) characters and decodes "quoted-pairs"
319 (ie. backslash-escaped characters)
320
321 @param str the string to work on.
322 @see addQuotes
323*/
324//AK_REVIEW: add correctly spelled methods and deprecated the wrongly spelled
325// TODO: KDE5: BIC: rename to "removeQuotes"
326KMIME_EXPORT extern void removeQuots( QByteArray &str );
327
328/**
329 Removes quote (DQUOTE) characters and decodes "quoted-pairs"
330 (ie. backslash-escaped characters)
331
332 @param str the string to work on.
333 @see addQuotes
334*/
335//AK_REVIEW: add correctly spelled methods and deprecated the wrongly spelled
336// TODO: KDE5: BIC: rename to "removeQuotes"
337KMIME_EXPORT extern void removeQuots( QString &str );
338
339/**
340 Converts the given string into a quoted-string if the string contains
341 any special characters (ie. one of ()<>@,.;:[]=\").
342
343 @param str us-ascii string to work on.
344 @param forceQuotes if @c true, always add quote characters.
345*/
346KMIME_EXPORT extern void addQuotes( QByteArray &str, bool forceQuotes );
347
348/**
349 * Overloaded method, behaves same as the above.
350 * @param str us-ascii string to work on.
351 * @param forceQuotes if @c true, always add quote characters.
352 * @since 4.5
353 */
354KMIME_EXPORT extern void addQuotes( QString &str, bool forceQuotes );
355
356/**
357 * Makes sure that the bidirectional state at the end of the string is the
358 * same as at the beginning of the string.
359 *
360 * This is useful so that Unicode control characters that can change the text
361 * direction can not spill over to following strings.
362 *
363 * As an example, consider a mailbox in the form "display name" <local@domain.com>.
364 * If the display name here contains unbalanced control characters that change the
365 * text direction, it would also have an effect on the addrspec, which could lead to
366 * spoofing.
367 *
368 * By passing the display name to this function, one can make sure that no change of
369 * the bidi state can spill over to the next strings, in this case the addrspec.
370 *
371 * Example: The string "Hello <RLO>World" is unbalanced, as it contains a right-to-left
372 * override character, which is never followed by a <PDF>, the "pop directional
373 * formatting" character. This function adds the missing <PDF> at the end, and
374 * the output of this function would be "Hello <RLO>World<PDF>".
375 *
376 * Example of spoofing:
377 * Consider "Firstname Lastname<RLO>" <moc.mitciv@attacker.com>. Because of the RLO,
378 * it is displayed as "Firstname Lastname <moc.rekcatta@victim.com>", which spoofs the
379 * domain name.
380 * By passing "Firstname Lastname<RLO>" to this function, one can balance the <RLO>,
381 * leading to "Firstname Lastname<RLO><PDF>", so the whole mailbox is displayed
382 * correctly as "Firstname Lastname" <moc.mitciv@attacker.com> again.
383 *
384 * See http://unicode.org/reports/tr9 for more information on bidi control chars.
385 *
386 * @param input the display name of a mailbox, which is checked for unbalanced Unicode
387 * direction control characters
388 * @return the display name which now contains a balanced state of direction control
389 * characters
390 *
391 * Note that this function does not do any parsing related to mailboxes, it only works
392 * on plain strings. Therefore, passing the complete mailbox will not lead to any results,
393 * only the display name should be passed.
394 *
395 * @since 4.5
396 */
397KMIME_EXPORT QString balanceBidiState( const QString &input );
398
399/**
400 * Similar to the above function. Instead of trying to balance the Bidi chars, it outright
401 * removes them from the string.
402 *
403 * @param input the display name of a mailbox, which is checked for unbalanced Unicode
404 * direction control characters
405 * Reason: KHTML seems to ignore the PDF character, so adding them doesn't fix things :(
406 */
407KMIME_EXPORT QString removeBidiControlChars( const QString &input );
408
409/**
410 * Returns whether or not the given MIME node contains an attachment part. This function will
411 * recursively parse the MIME tree looking for a suitable attachment and return true if one is found.
412 * @param content the MIME node to parse
413 */
414KMIME_EXPORT bool hasAttachment( Content* content );
415
416/**
417 * Returns whether or not the given @p message is partly or fully signed.
418 *
419 * @param message the message to check for being signed
420 * @since 4.6
421 */
422KMIME_EXPORT bool isSigned( Message* message );
423
424/**
425 * Returns whether or not the given @p message is partly or fully encrypted.
426 *
427 * @param message the message to check for being encrypted
428 * @since 4.6
429 */
430KMIME_EXPORT bool isEncrypted( Message* message );
431
432/**
433 * Returns whether or not the given MIME @p content is an invitation
434 * message of the iTIP protocol.
435 *
436 * @since 4.6
437 */
438KMIME_EXPORT bool isInvitation( Content* content );
439
440} // namespace KMime
441
442#endif /* __KMIME_UTIL_H__ */
443