kmime_codecs.h [kdepimlibs/kmime/kmime_codecs.h]

1	/ -- c++ --*
2
3	KMime, the KDE Internet mail/usenet news message library.
4	Copyright (c) 2001-2002 Marc Mutz <mutz@kde.org>
5
6	This library is free software; you can redistribute it and/or
7	modify it under the terms of the GNU Library General Public
8	License as published by the Free Software Foundation; either
9	version 2 of the License, or (at your option) any later version.
10
11	This library is distributed in the hope that it will be useful,
12	but WITHOUT ANY WARRANTY; without even the implied warranty of
13	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14	Library General Public License for more details.
15
16	You should have received a copy of the GNU Library General Public License
17	along with this library; see the file COPYING.LIB. If not, write to
18	the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
19	Boston, MA 02110-1301, USA.
20	*/
21	/**
22	@file
23	This file is part of the API for handling @ref MIME data and
24	defines the Codec class.
25
26	@brief
27	Defines the classes Codec class.
28
29	@authors Marc Mutz \<mutz@kde.org\>
30
31	@glossary @anchor MIME @anchor mime @b MIME:
32	<b>Multipurpose Internet Mail Extensions</b> or @acronym MIME is an
33	Internet Standard that extends the format of e-mail to support text in
34	character sets other than US-ASCII, non-text attachments, multi-part message
35	bodies, and header information in non-ASCII character sets. Virtually all
36	human-written Internet e-mail and a fairly large proportion of automated
37	e-mail is transmitted via @acronym SMTP in MIME format. Internet e-mail is
38	so closely associated with the SMTP and MIME standards that it is sometimes
39	called SMTP/MIME e-mail. The content types defined by MIME standards are
40	also of growing importance outside of e-mail, such as in communication
41	protocols like @acronym HTTP for the World Wide Web. MIME is also a
42	fundamental component of communication protocols such as HTTP, which
43	requires that data be transmitted in the context of e-mail-like messages,
44	even though the data may not actually be e-mail.
45
46	@glossary @anchor codec @anchor codecs @anchor Codec @anchor Codecs @b codec:
47	a program capable of performing encoding and decoding on a digital data
48	stream. Codecs encode data for storage or encryption and decode it for
49	viewing or editing.
50
51	@glossary @anchor CRLF @b CRLF: a "Carriage Return (0x0D)" followed by a
52	"Line Feed (0x0A)", two ASCII control characters used to represent a
53	newline on some operating systems, notably DOS and Microsoft Windows.
54
55	@glossary @anchor LF @b LF: a "Line Feed (0x0A)" ASCII control character used
56	to represent a newline on some operating systems, notably Unix, Unix-like,
57	and Linux.
58	*/
59
60	#ifndef __KMIME_CODECS__
61	#define __KMIME_CODECS__
62
63	#include <QtCore/QByteArray>
64
65	#include <kdebug.h> // for kFatal()
66
67	#include "kmime_export.h"
68
69	namespace KMime {
70
71	template <class Key, class T> class KAutoDeleteHash;
72
73	class Encoder;
74	class Decoder;
75
76	/**
77	@brief
78	An abstract base class of @ref codecs for common mail transfer encodings.
79
80	Provides an abstract base class of @ref codecs like base64 and quoted-printable.
81	Implemented as a singleton.
82	*/
83	class KMIME_EXPORT Codec
84	{
85	protected:
86	//@cond PRIVATE
87	static KAutoDeleteHash<QByteArray, Codec> *all;
88	static void cleanupCodec();
89	//@endcond
90	/**
91	Contructs the codec.
92	*/
93	Codec() {}
94
95	public:
96	/**
97	Returns a codec associated with the specified @p name.
98
99	@param name points to a character string containing a valid codec name.
100	*/
101	static Codec codecForName( const* char *name );
102
103	/**
104	Returns a codec associated with the specified @p name.
105
106	@param name is a QByteArray containing a valid codec name.
107	*/
108	static Codec codecForName( const* QByteArray &name );
109
110	/**
111	Computes the maximum size, in characters, needed for the encoding.
112
113	@param insize is the number of input characters to be encoded.
114	@param withCRLF if true, make the newlines @ref CRLF; else use @ref LF.
115
116	@return the maximum number of characters in the encoding.
117	*/
118	virtual int maxEncodedSizeFor( int insize, bool withCRLF=false ) const = `0`;
119
120	/**
121	Computes the maximum size, in characters, needed for the deccoding.
122
123	@param insize is the number of input characters to be decoded.
124	@param withCRLF if true, make the newlines @ref CRLF; else use @ref LF.
125
126	@return the maximum number of characters in the decoding.
127	*/
128	virtual int maxDecodedSizeFor( int insize, bool withCRLF=false ) const = `0`;
129
130	/**
131	Creates the encoder for the codec.
132
133	@param withCRLF if true, make the newlines @ref CRLF; else use @ref LF.
134
135	@return a pointer to an instance of the codec's encoder.
136	*/
137	virtual Encoder makeEncoder( bool* withCRLF=false ) const = `0`;
138
139	/**
140	Creates the decoder for the codec.
141
142	@param withCRLF if true, make the newlines @ref CRLF; else use @ref LF.
143
144	@return a pointer to an instance of the codec's decoder.
145	*/
146	virtual Decoder makeDecoder( bool* withCRLF=false ) const = `0`;
147
148	/**
149	Convenience wrapper that can be used for small chunks of data
150	when you can provide a large enough buffer. The default
151	implementation creates an Encoder and uses it.
152
153	Encodes a chunk of bytes starting at @p scursor and extending to
154	@p send into the buffer described by @p dcursor and @p dend.
155
156	This function doesn't support chaining of blocks. The returned
157	block cannot be added to, but you don't need to finalize it, too.
158
159	Example usage (@p in contains the input data):
160	<pre>
161	KMime::Codec codec = KMime::Codec::codecForName( "base64" );*
162	kFatal( !codec ) << "no base64 codec found!?";
163	QByteArray out( in.size()1.4 ); // crude maximal size of b64 encoding*
164	QByteArray::Iterator iit = in.begin();
165	QByteArray::Iterator oit = out.begin();
166	if ( !codec->encode( iit, in.end(), oit, out.end() ) ) {
167	kDebug() << "output buffer too small";
168	return;
169	}
170	kDebug() << "Size of encoded data:" << oit - out.begin();
171	</pre>
172
173	@param scursor is a pointer to the start of the input buffer.
174	@param send is a pointer to the end of the input buffer.
175	@param dcursor is a pointer to the start of the output buffer.
176	@param dend is a pointer to the end of the output buffer.
177	@param withCRLF if true, make the newlines @ref CRLF; else use @ref LF.
178
179	@return false if the encoded data didn't fit into the output buffer;
180	true otherwise.
181	*/
182	virtual bool encode( const char* &scursor, const char * const send,
183	char* &dcursor, const char * const dend,
184	bool withCRLF=false ) const;
185
186	/**
187	Convenience wrapper that can be used for small chunks of data
188	when you can provide a large enough buffer. The default
189	implementation creates a Decoder and uses it.
190
191	Decodes a chunk of bytes starting at @p scursor and extending to
192	@p send into the buffer described by @p dcursor and @p dend.
193
194	This function doesn't support chaining of blocks. The returned
195	block cannot be added to, but you don't need to finalize it, too.
196
197	Example usage (@p in contains the input data):
198	<pre>
199	KMime::Codec codec = KMime::Codec::codecForName( "base64" );*
200	kFatal( !codec ) << "no base64 codec found!?";
201	QByteArray out( in.size() ); // good guess for any encoding...
202	QByteArray::Iterator iit = in.begin();
203	QByteArray::Iterator oit = out.begin();
204	if ( !codec->decode( iit, in.end(), oit, out.end() ) ) {
205	kDebug() << "output buffer too small";
206	return;
207	}
208	kDebug() << "Size of decoded data:" << oit - out.begin();
209	</pre>
210
211	@param scursor is a pointer to the start of the input buffer.
212	@param send is a pointer to the end of the input buffer.
213	@param dcursor is a pointer to the start of the output buffer.
214	@param dend is a pointer to the end of the output buffer.
215	@param withCRLF if true, make the newlines @ref CRLF; else use @ref LF.
216
217	@return false if the decoded data didn't fit into the output buffer;
218	true otherwise.
219	*/
220	virtual bool decode( const char* &scursor, const char * const send,
221	char* &dcursor, const char * const dend,
222	bool withCRLF=false ) const;
223
224	/**
225	Even more convenient, but also a bit slower and more memory
226	intensive, since it allocates storage for the worst case and then
227	shrinks the result QByteArray to the actual size again.
228
229	For use with small @p src.
230
231	@param src is a QByteArray containing the data to encode.
232	@param withCRLF if true, make the newlines @ref CRLF; else use @ref LF.
233	*/
234	virtual QByteArray encode( const QByteArray &src, bool withCRLF=false ) const;
235
236	/**
237	Even more convenient, but also a bit slower and more memory
238	intensive, since it allocates storage for the worst case and then
239	shrinks the result QByteArray to the actual size again.
240
241	For use with small @p src.
242
243	@param src is a QByteArray containing the data to decode.
244	@param withCRLF if true, make the newlines @ref CRLF; else use @ref LF.
245	*/
246	virtual QByteArray decode( const QByteArray &src, bool withCRLF=false ) const;
247
248	/**
249	Returns the name of the encoding. Guaranteed to be lowercase.
250	*/
251	virtual const char name() const* = `0`;
252
253	/**
254	Destroys the codec.
255	*/
256	virtual ~Codec() {}
257
258	private:
259	/**
260	Fills the KAutoDeleteHash with all the supported codecs.
261	*/
262	static void fillDictionary();
263	};
264
265	/**
266	@brief Stateful CTE decoder class
267
268	Stateful decoder class, modelled after QTextDecoder.
269
270	@section Overview
271
272	KMime decoders are designed to be able to process encoded data in
273	chunks of arbitrary size and to work with output buffers of also
274	arbitrary size. They maintain any state necessary to go on where
275	the previous call left off.
276
277	The class consists of only two methods of interest: see decode,
278	which decodes an input block and finalize, which flushes any
279	remaining data to the output stream.
280
281	Typically, you will create a decoder instance, call decode as
282	often as necessary, then call finalize (most often a single
283	call suffices, but it might be that during that call the output
284	buffer is filled, so you should be prepared to call finalize
285	as often as necessary, ie. until it returns @p true).
286
287	@section Return Values
288
289	Both methods return @p true to indicate that they've finished their
290	job. For decode, a return value of @p true means that the
291	current input block has been finished (@p false most often means
292	that the output buffer is full, but that isn't required
293	behavior. The decode call is free to return at arbitrary
294	times during processing).
295
296	For finalize, a return value of @p true means that all data
297	implicitly or explicitly stored in the decoder instance has been
298	flushed to the output buffer. A @p false return value should be
299	interpreted as "check if the output buffer is full and call me
300	again", just as with decode.
301
302	@section Usage Pattern
303
304	Since the decoder maintains state, you can only use it once. After
305	a sequence of input blocks has been processed, you finalize
306	the output and then delete the decoder instance. If you want to
307	process another input block sequence, you create a new instance.
308
309	Typical usage (@p in contains the (base64-encoded) input data),
310	taking into account all the conventions detailed above:
311
312	<pre>
313	KMime::Codec codec = KMime::Codec::codecForName( "base64" );*
314	kFatal( !codec ) << "No codec found for base64!";
315	KMime::Decoder dec = codec->makeDecoder();*
316	assert( dec ); // should not happen
317	QByteArray out( 256 ); // small buffer is enough ;-)
318	QByteArray::Iterator iit = in.begin();
319	QByteArray::Iterator oit = out.begin();
320	// decode the chunk
321	while ( !dec->decode( iit, in.end(), oit, out.end() ) )
322	if ( oit == out.end() ) { // output buffer full, process contents
323	do_something_with( out );
324	oit = out.begin();
325	}
326	// repeat while loop for each input block
327	// ...
328	// finish (flush remaining data from decoder):
329	while ( !dec->finish( oit, out.end() ) )
330	if ( oit == out.end() ) { // output buffer full, process contents
331	do_something_with( out );
332	oit = out.begin();
333	}
334	// now process last chunk:
335	out.resize( oit - out.begin() );
336	do_something_with( out );
337	// _delete_ the decoder, but not the codec:
338	delete dec;
339	</pre>
340	*/
341	class Decoder
342	{
343	protected:
344	friend class Codec;
345	/**
346	Protected constructor. Use KMime::Codec::makeDecoder to create an
347	instance.
348
349	@param withCRLF if true, make the newlines @ref CRLF; else use @ref LF.
350	*/
351	Decoder( bool withCRLF=false )
352	: mWithCRLF( withCRLF ) {}
353
354	public:
355	/**
356	Destroys the decoder.
357	*/
358	virtual ~Decoder() {}
359
360	/**
361	Decodes a chunk of data, maintaining state information between
362	calls. See class decumentation for calling conventions.
363
364	@param scursor is a pointer to the start of the input buffer.
365	@param send is a pointer to the end of the input buffer.
366	@param dcursor is a pointer to the start of the output buffer.
367	@param dend is a pointer to the end of the output buffer.
368	*/
369	virtual bool decode( const char* &scursor, const char * const send,
370	char* &dcursor, const char * const dend ) = `0`;
371
372	/**
373	Call this method to finalize the output stream. Writes all
374	remaining data and resets the decoder. See KMime::Codec for
375	calling conventions.
376
377	@param dcursor is a pointer to the start of the output buffer.
378	@param dend is a pointer to the end of the output buffer.
379	*/
380	virtual bool finish( char* &dcursor, const char * const dend ) = `0`;
381
382	protected:
383	//@cond PRIVATE
384	const bool mWithCRLF;
385	//@endcond
386	};
387
388	/**
389	@brief
390	Stateful encoder class.
391
392	Stateful encoder class, modeled after QTextEncoder.
393	*/
394	class Encoder
395	{
396	protected:
397	friend class Codec;
398	/**
399	Protected constructor. Use KMime::Codec::makeEncoder if you want one.
400
401	@param withCRLF if true, make the newlines @ref CRLF; else use @ref LF.
402	*/
403	explicit Encoder( bool withCRLF=false )
404	: mOutputBufferCursor( `0` ), mWithCRLF( withCRLF ) {}
405
406	public:
407	/**
408	Destroys the encoder.
409	*/
410	virtual ~Encoder() {}
411
412	/**
413	Encodes a chunk of data, maintaining state information between
414	calls. See KMime::Codec for calling conventions.
415
416	@param scursor is a pointer to the start of the input buffer.
417	@param send is a pointer to the end of the input buffer.
418	@param dcursor is a pointer to the start of the output buffer.
419	@param dend is a pointer to the end of the output buffer.
420	*/
421	virtual bool encode( const char* &scursor, const char * const send,
422	char* &dcursor, const char * const dend ) = `0`;
423
424	/**
425	Call this method to finalize the output stream. Writes all remaining
426	data and resets the encoder. See KMime::Codec for calling conventions.
427
428	@param dcursor is a pointer to the start of the output buffer.
429	@param dend is a pointer to the end of the output buffer.
430	*/
431	virtual bool finish( char* &dcursor, const char * const dend ) = `0`;
432
433	protected:
434	/**
435	The maximum number of characters permitted in the output buffer.
436	*/
437	enum {
438	maxBufferedChars = `8` /< Eight /*
439	};
440
441	/**
442	Writes character @p ch to the output stream or the output buffer,
443	depending on whether or not the output stream has space left.
444
445	@param ch is the character to write.
446	@param dcursor is a pointer to the start of the output buffer.
447	@param dend is a pointer to the end of the output buffer.
448
449	@return true if written to the output stream; else false if buffered.
450	*/
451	bool write( char ch, char* &dcursor, const char * const dend )
452	{
453	if ( dcursor != dend ) {
454	// if there's space in the output stream, write there:
455	*dcursor++ = ch;
456	return true;
457	} else {
458	// else buffer the output:
459	kFatal( mOutputBufferCursor >= maxBufferedChars )
460	<< "KMime::Encoder: internal buffer overflow!";
461	mOutputBuffer[ mOutputBufferCursor++ ] = ch;
462	return false;
463	}
464	}
465
466	/**
467	Writes characters from the output buffer to the output stream.
468	Implementations of encode and finish should call this
469	at the very beginning and for each iteration of the while loop.
470
471	@param dcursor is a pointer to the start of the output buffer.
472	@param dend is a pointer to the end of the output buffer.
473
474	@return true if all chars could be written, false otherwise
475	*/
476	bool flushOutputBuffer( char* &dcursor, const char * const dend );
477
478	/**
479	Convenience function. Outputs @ref LF or @ref CRLF, based on the
480	state of mWithCRLF.
481
482	@param dcursor is a pointer to the start of the output buffer.
483	@param dend is a pointer to the end of the output buffer.
484	*/
485	bool writeCRLF( char* &dcursor, const char * const dend )
486	{
487	if ( mWithCRLF ) {
488	write( '\r', dcursor, dend );
489	}
490	return write( '\n', dcursor, dend );
491	}
492
493	private:
494	/**
495	An output buffer to simplify some codecs.
496	Used with write() and flushOutputBuffer().
497	*/
498	//@cond PRIVATE
499	char mOutputBuffer[ maxBufferedChars ];
500	//@endcond
501
502	protected:
503	//@cond PRIVATE
504	uchar mOutputBufferCursor;
505	const bool mWithCRLF;
506	//@endcond
507	};
508
509	} // namespace KMime
510
511	#endif // __KMIME_CODECS__
512