1 | /* -*- c++ -*- |
2 | |
3 | KMime, the KDE Internet mail/usenet news message library. |
4 | Copyright (c) 2001-2002 Marc Mutz <mutz@kde.org> |
5 | |
6 | This library is free software; you can redistribute it and/or |
7 | modify it under the terms of the GNU Library General Public |
8 | License as published by the Free Software Foundation; either |
9 | version 2 of the License, or (at your option) any later version. |
10 | |
11 | This library is distributed in the hope that it will be useful, |
12 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
14 | Library General Public License for more details. |
15 | |
16 | You should have received a copy of the GNU Library General Public License |
17 | along with this library; see the file COPYING.LIB. If not, write to |
18 | the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, |
19 | Boston, MA 02110-1301, USA. |
20 | */ |
21 | /** |
22 | @file |
23 | This file is part of the API for handling @ref MIME data and |
24 | defines the Codec class. |
25 | |
26 | @brief |
27 | Defines the classes Codec class. |
28 | |
29 | @authors Marc Mutz \<mutz@kde.org\> |
30 | |
31 | @glossary @anchor MIME @anchor mime @b MIME: |
32 | <b>Multipurpose Internet Mail Extensions</b> or @acronym MIME is an |
33 | Internet Standard that extends the format of e-mail to support text in |
34 | character sets other than US-ASCII, non-text attachments, multi-part message |
35 | bodies, and header information in non-ASCII character sets. Virtually all |
36 | human-written Internet e-mail and a fairly large proportion of automated |
37 | e-mail is transmitted via @acronym SMTP in MIME format. Internet e-mail is |
38 | so closely associated with the SMTP and MIME standards that it is sometimes |
39 | called SMTP/MIME e-mail. The content types defined by MIME standards are |
40 | also of growing importance outside of e-mail, such as in communication |
41 | protocols like @acronym HTTP for the World Wide Web. MIME is also a |
42 | fundamental component of communication protocols such as HTTP, which |
43 | requires that data be transmitted in the context of e-mail-like messages, |
44 | even though the data may not actually be e-mail. |
45 | |
46 | @glossary @anchor codec @anchor codecs @anchor Codec @anchor Codecs @b codec: |
47 | a program capable of performing encoding and decoding on a digital data |
48 | stream. Codecs encode data for storage or encryption and decode it for |
49 | viewing or editing. |
50 | |
51 | @glossary @anchor CRLF @b CRLF: a "Carriage Return (0x0D)" followed by a |
52 | "Line Feed (0x0A)", two ASCII control characters used to represent a |
53 | newline on some operating systems, notably DOS and Microsoft Windows. |
54 | |
55 | @glossary @anchor LF @b LF: a "Line Feed (0x0A)" ASCII control character used |
56 | to represent a newline on some operating systems, notably Unix, Unix-like, |
57 | and Linux. |
58 | */ |
59 | |
60 | #ifndef __KMIME_CODECS__ |
61 | #define __KMIME_CODECS__ |
62 | |
63 | #include <QtCore/QByteArray> |
64 | |
65 | #include <kdebug.h> // for kFatal() |
66 | |
67 | #include "kmime_export.h" |
68 | |
69 | namespace KMime { |
70 | |
71 | template <class Key, class T> class KAutoDeleteHash; |
72 | |
73 | class Encoder; |
74 | class Decoder; |
75 | |
76 | /** |
77 | @brief |
78 | An abstract base class of @ref codecs for common mail transfer encodings. |
79 | |
80 | Provides an abstract base class of @ref codecs like base64 and quoted-printable. |
81 | Implemented as a singleton. |
82 | */ |
83 | class KMIME_EXPORT Codec |
84 | { |
85 | protected: |
86 | //@cond PRIVATE |
87 | static KAutoDeleteHash<QByteArray, Codec> *all; |
88 | static void cleanupCodec(); |
89 | //@endcond |
90 | /** |
91 | Contructs the codec. |
92 | */ |
93 | Codec() {} |
94 | |
95 | public: |
96 | /** |
97 | Returns a codec associated with the specified @p name. |
98 | |
99 | @param name points to a character string containing a valid codec name. |
100 | */ |
101 | static Codec *codecForName( const char *name ); |
102 | |
103 | /** |
104 | Returns a codec associated with the specified @p name. |
105 | |
106 | @param name is a QByteArray containing a valid codec name. |
107 | */ |
108 | static Codec *codecForName( const QByteArray &name ); |
109 | |
110 | /** |
111 | Computes the maximum size, in characters, needed for the encoding. |
112 | |
113 | @param insize is the number of input characters to be encoded. |
114 | @param withCRLF if true, make the newlines @ref CRLF; else use @ref LF. |
115 | |
116 | @return the maximum number of characters in the encoding. |
117 | */ |
118 | virtual int maxEncodedSizeFor( int insize, bool withCRLF=false ) const = 0; |
119 | |
120 | /** |
121 | Computes the maximum size, in characters, needed for the deccoding. |
122 | |
123 | @param insize is the number of input characters to be decoded. |
124 | @param withCRLF if true, make the newlines @ref CRLF; else use @ref LF. |
125 | |
126 | @return the maximum number of characters in the decoding. |
127 | */ |
128 | virtual int maxDecodedSizeFor( int insize, bool withCRLF=false ) const = 0; |
129 | |
130 | /** |
131 | Creates the encoder for the codec. |
132 | |
133 | @param withCRLF if true, make the newlines @ref CRLF; else use @ref LF. |
134 | |
135 | @return a pointer to an instance of the codec's encoder. |
136 | */ |
137 | virtual Encoder *makeEncoder( bool withCRLF=false ) const = 0; |
138 | |
139 | /** |
140 | Creates the decoder for the codec. |
141 | |
142 | @param withCRLF if true, make the newlines @ref CRLF; else use @ref LF. |
143 | |
144 | @return a pointer to an instance of the codec's decoder. |
145 | */ |
146 | virtual Decoder *makeDecoder( bool withCRLF=false ) const = 0; |
147 | |
148 | /** |
149 | Convenience wrapper that can be used for small chunks of data |
150 | when you can provide a large enough buffer. The default |
151 | implementation creates an Encoder and uses it. |
152 | |
153 | Encodes a chunk of bytes starting at @p scursor and extending to |
154 | @p send into the buffer described by @p dcursor and @p dend. |
155 | |
156 | This function doesn't support chaining of blocks. The returned |
157 | block cannot be added to, but you don't need to finalize it, too. |
158 | |
159 | Example usage (@p in contains the input data): |
160 | <pre> |
161 | KMime::Codec *codec = KMime::Codec::codecForName( "base64" ); |
162 | kFatal( !codec ) << "no base64 codec found!?"; |
163 | QByteArray out( in.size()*1.4 ); // crude maximal size of b64 encoding |
164 | QByteArray::Iterator iit = in.begin(); |
165 | QByteArray::Iterator oit = out.begin(); |
166 | if ( !codec->encode( iit, in.end(), oit, out.end() ) ) { |
167 | kDebug() << "output buffer too small"; |
168 | return; |
169 | } |
170 | kDebug() << "Size of encoded data:" << oit - out.begin(); |
171 | </pre> |
172 | |
173 | @param scursor is a pointer to the start of the input buffer. |
174 | @param send is a pointer to the end of the input buffer. |
175 | @param dcursor is a pointer to the start of the output buffer. |
176 | @param dend is a pointer to the end of the output buffer. |
177 | @param withCRLF if true, make the newlines @ref CRLF; else use @ref LF. |
178 | |
179 | @return false if the encoded data didn't fit into the output buffer; |
180 | true otherwise. |
181 | */ |
182 | virtual bool encode( const char* &scursor, const char * const send, |
183 | char* &dcursor, const char * const dend, |
184 | bool withCRLF=false ) const; |
185 | |
186 | /** |
187 | Convenience wrapper that can be used for small chunks of data |
188 | when you can provide a large enough buffer. The default |
189 | implementation creates a Decoder and uses it. |
190 | |
191 | Decodes a chunk of bytes starting at @p scursor and extending to |
192 | @p send into the buffer described by @p dcursor and @p dend. |
193 | |
194 | This function doesn't support chaining of blocks. The returned |
195 | block cannot be added to, but you don't need to finalize it, too. |
196 | |
197 | Example usage (@p in contains the input data): |
198 | <pre> |
199 | KMime::Codec *codec = KMime::Codec::codecForName( "base64" ); |
200 | kFatal( !codec ) << "no base64 codec found!?"; |
201 | QByteArray out( in.size() ); // good guess for any encoding... |
202 | QByteArray::Iterator iit = in.begin(); |
203 | QByteArray::Iterator oit = out.begin(); |
204 | if ( !codec->decode( iit, in.end(), oit, out.end() ) ) { |
205 | kDebug() << "output buffer too small"; |
206 | return; |
207 | } |
208 | kDebug() << "Size of decoded data:" << oit - out.begin(); |
209 | </pre> |
210 | |
211 | @param scursor is a pointer to the start of the input buffer. |
212 | @param send is a pointer to the end of the input buffer. |
213 | @param dcursor is a pointer to the start of the output buffer. |
214 | @param dend is a pointer to the end of the output buffer. |
215 | @param withCRLF if true, make the newlines @ref CRLF; else use @ref LF. |
216 | |
217 | @return false if the decoded data didn't fit into the output buffer; |
218 | true otherwise. |
219 | */ |
220 | virtual bool decode( const char* &scursor, const char * const send, |
221 | char* &dcursor, const char * const dend, |
222 | bool withCRLF=false ) const; |
223 | |
224 | /** |
225 | Even more convenient, but also a bit slower and more memory |
226 | intensive, since it allocates storage for the worst case and then |
227 | shrinks the result QByteArray to the actual size again. |
228 | |
229 | For use with small @p src. |
230 | |
231 | @param src is a QByteArray containing the data to encode. |
232 | @param withCRLF if true, make the newlines @ref CRLF; else use @ref LF. |
233 | */ |
234 | virtual QByteArray encode( const QByteArray &src, bool withCRLF=false ) const; |
235 | |
236 | /** |
237 | Even more convenient, but also a bit slower and more memory |
238 | intensive, since it allocates storage for the worst case and then |
239 | shrinks the result QByteArray to the actual size again. |
240 | |
241 | For use with small @p src. |
242 | |
243 | @param src is a QByteArray containing the data to decode. |
244 | @param withCRLF if true, make the newlines @ref CRLF; else use @ref LF. |
245 | */ |
246 | virtual QByteArray decode( const QByteArray &src, bool withCRLF=false ) const; |
247 | |
248 | /** |
249 | Returns the name of the encoding. Guaranteed to be lowercase. |
250 | */ |
251 | virtual const char *name() const = 0; |
252 | |
253 | /** |
254 | Destroys the codec. |
255 | */ |
256 | virtual ~Codec() {} |
257 | |
258 | private: |
259 | /** |
260 | Fills the KAutoDeleteHash with all the supported codecs. |
261 | */ |
262 | static void fillDictionary(); |
263 | }; |
264 | |
265 | /** |
266 | @brief Stateful CTE decoder class |
267 | |
268 | Stateful decoder class, modelled after QTextDecoder. |
269 | |
270 | @section Overview |
271 | |
272 | KMime decoders are designed to be able to process encoded data in |
273 | chunks of arbitrary size and to work with output buffers of also |
274 | arbitrary size. They maintain any state necessary to go on where |
275 | the previous call left off. |
276 | |
277 | The class consists of only two methods of interest: see decode, |
278 | which decodes an input block and finalize, which flushes any |
279 | remaining data to the output stream. |
280 | |
281 | Typically, you will create a decoder instance, call decode as |
282 | often as necessary, then call finalize (most often a single |
283 | call suffices, but it might be that during that call the output |
284 | buffer is filled, so you should be prepared to call finalize |
285 | as often as necessary, ie. until it returns @p true). |
286 | |
287 | @section Return Values |
288 | |
289 | Both methods return @p true to indicate that they've finished their |
290 | job. For decode, a return value of @p true means that the |
291 | current input block has been finished (@p false most often means |
292 | that the output buffer is full, but that isn't required |
293 | behavior. The decode call is free to return at arbitrary |
294 | times during processing). |
295 | |
296 | For finalize, a return value of @p true means that all data |
297 | implicitly or explicitly stored in the decoder instance has been |
298 | flushed to the output buffer. A @p false return value should be |
299 | interpreted as "check if the output buffer is full and call me |
300 | again", just as with decode. |
301 | |
302 | @section Usage Pattern |
303 | |
304 | Since the decoder maintains state, you can only use it once. After |
305 | a sequence of input blocks has been processed, you finalize |
306 | the output and then delete the decoder instance. If you want to |
307 | process another input block sequence, you create a new instance. |
308 | |
309 | Typical usage (@p in contains the (base64-encoded) input data), |
310 | taking into account all the conventions detailed above: |
311 | |
312 | <pre> |
313 | KMime::Codec *codec = KMime::Codec::codecForName( "base64" ); |
314 | kFatal( !codec ) << "No codec found for base64!"; |
315 | KMime::Decoder *dec = codec->makeDecoder(); |
316 | assert( dec ); // should not happen |
317 | QByteArray out( 256 ); // small buffer is enough ;-) |
318 | QByteArray::Iterator iit = in.begin(); |
319 | QByteArray::Iterator oit = out.begin(); |
320 | // decode the chunk |
321 | while ( !dec->decode( iit, in.end(), oit, out.end() ) ) |
322 | if ( oit == out.end() ) { // output buffer full, process contents |
323 | do_something_with( out ); |
324 | oit = out.begin(); |
325 | } |
326 | // repeat while loop for each input block |
327 | // ... |
328 | // finish (flush remaining data from decoder): |
329 | while ( !dec->finish( oit, out.end() ) ) |
330 | if ( oit == out.end() ) { // output buffer full, process contents |
331 | do_something_with( out ); |
332 | oit = out.begin(); |
333 | } |
334 | // now process last chunk: |
335 | out.resize( oit - out.begin() ); |
336 | do_something_with( out ); |
337 | // _delete_ the decoder, but not the codec: |
338 | delete dec; |
339 | </pre> |
340 | */ |
341 | class Decoder |
342 | { |
343 | protected: |
344 | friend class Codec; |
345 | /** |
346 | Protected constructor. Use KMime::Codec::makeDecoder to create an |
347 | instance. |
348 | |
349 | @param withCRLF if true, make the newlines @ref CRLF; else use @ref LF. |
350 | */ |
351 | Decoder( bool withCRLF=false ) |
352 | : mWithCRLF( withCRLF ) {} |
353 | |
354 | public: |
355 | /** |
356 | Destroys the decoder. |
357 | */ |
358 | virtual ~Decoder() {} |
359 | |
360 | /** |
361 | Decodes a chunk of data, maintaining state information between |
362 | calls. See class decumentation for calling conventions. |
363 | |
364 | @param scursor is a pointer to the start of the input buffer. |
365 | @param send is a pointer to the end of the input buffer. |
366 | @param dcursor is a pointer to the start of the output buffer. |
367 | @param dend is a pointer to the end of the output buffer. |
368 | */ |
369 | virtual bool decode( const char* &scursor, const char * const send, |
370 | char* &dcursor, const char * const dend ) = 0; |
371 | |
372 | /** |
373 | Call this method to finalize the output stream. Writes all |
374 | remaining data and resets the decoder. See KMime::Codec for |
375 | calling conventions. |
376 | |
377 | @param dcursor is a pointer to the start of the output buffer. |
378 | @param dend is a pointer to the end of the output buffer. |
379 | */ |
380 | virtual bool finish( char* &dcursor, const char * const dend ) = 0; |
381 | |
382 | protected: |
383 | //@cond PRIVATE |
384 | const bool mWithCRLF; |
385 | //@endcond |
386 | }; |
387 | |
388 | /** |
389 | @brief |
390 | Stateful encoder class. |
391 | |
392 | Stateful encoder class, modeled after QTextEncoder. |
393 | */ |
394 | class Encoder |
395 | { |
396 | protected: |
397 | friend class Codec; |
398 | /** |
399 | Protected constructor. Use KMime::Codec::makeEncoder if you want one. |
400 | |
401 | @param withCRLF if true, make the newlines @ref CRLF; else use @ref LF. |
402 | */ |
403 | explicit Encoder( bool withCRLF=false ) |
404 | : mOutputBufferCursor( 0 ), mWithCRLF( withCRLF ) {} |
405 | |
406 | public: |
407 | /** |
408 | Destroys the encoder. |
409 | */ |
410 | virtual ~Encoder() {} |
411 | |
412 | /** |
413 | Encodes a chunk of data, maintaining state information between |
414 | calls. See KMime::Codec for calling conventions. |
415 | |
416 | @param scursor is a pointer to the start of the input buffer. |
417 | @param send is a pointer to the end of the input buffer. |
418 | @param dcursor is a pointer to the start of the output buffer. |
419 | @param dend is a pointer to the end of the output buffer. |
420 | */ |
421 | virtual bool encode( const char* &scursor, const char * const send, |
422 | char* &dcursor, const char * const dend ) = 0; |
423 | |
424 | /** |
425 | Call this method to finalize the output stream. Writes all remaining |
426 | data and resets the encoder. See KMime::Codec for calling conventions. |
427 | |
428 | @param dcursor is a pointer to the start of the output buffer. |
429 | @param dend is a pointer to the end of the output buffer. |
430 | */ |
431 | virtual bool finish( char* &dcursor, const char * const dend ) = 0; |
432 | |
433 | protected: |
434 | /** |
435 | The maximum number of characters permitted in the output buffer. |
436 | */ |
437 | enum { |
438 | maxBufferedChars = 8 /**< Eight */ |
439 | }; |
440 | |
441 | /** |
442 | Writes character @p ch to the output stream or the output buffer, |
443 | depending on whether or not the output stream has space left. |
444 | |
445 | @param ch is the character to write. |
446 | @param dcursor is a pointer to the start of the output buffer. |
447 | @param dend is a pointer to the end of the output buffer. |
448 | |
449 | @return true if written to the output stream; else false if buffered. |
450 | */ |
451 | bool write( char ch, char* &dcursor, const char * const dend ) |
452 | { |
453 | if ( dcursor != dend ) { |
454 | // if there's space in the output stream, write there: |
455 | *dcursor++ = ch; |
456 | return true; |
457 | } else { |
458 | // else buffer the output: |
459 | kFatal( mOutputBufferCursor >= maxBufferedChars ) |
460 | << "KMime::Encoder: internal buffer overflow!" ; |
461 | mOutputBuffer[ mOutputBufferCursor++ ] = ch; |
462 | return false; |
463 | } |
464 | } |
465 | |
466 | /** |
467 | Writes characters from the output buffer to the output stream. |
468 | Implementations of encode and finish should call this |
469 | at the very beginning and for each iteration of the while loop. |
470 | |
471 | @param dcursor is a pointer to the start of the output buffer. |
472 | @param dend is a pointer to the end of the output buffer. |
473 | |
474 | @return true if all chars could be written, false otherwise |
475 | */ |
476 | bool flushOutputBuffer( char* &dcursor, const char * const dend ); |
477 | |
478 | /** |
479 | Convenience function. Outputs @ref LF or @ref CRLF, based on the |
480 | state of mWithCRLF. |
481 | |
482 | @param dcursor is a pointer to the start of the output buffer. |
483 | @param dend is a pointer to the end of the output buffer. |
484 | */ |
485 | bool writeCRLF( char* &dcursor, const char * const dend ) |
486 | { |
487 | if ( mWithCRLF ) { |
488 | write( '\r', dcursor, dend ); |
489 | } |
490 | return write( '\n', dcursor, dend ); |
491 | } |
492 | |
493 | private: |
494 | /** |
495 | An output buffer to simplify some codecs. |
496 | Used with write() and flushOutputBuffer(). |
497 | */ |
498 | //@cond PRIVATE |
499 | char mOutputBuffer[ maxBufferedChars ]; |
500 | //@endcond |
501 | |
502 | protected: |
503 | //@cond PRIVATE |
504 | uchar mOutputBufferCursor; |
505 | const bool mWithCRLF; |
506 | //@endcond |
507 | }; |
508 | |
509 | } // namespace KMime |
510 | |
511 | #endif // __KMIME_CODECS__ |
512 | |