kmime_codec_base64.cpp [kdepimlibs/kmime/kmime_codec_base64.cpp]

1	/ -- c++ --*
2	kmime_codec_base64.cpp
3
4	KMime, the KDE Internet mail/usenet news message library.
5	Copyright (c) 2001 Marc Mutz <mutz@kde.org>
6
7	This library is free software; you can redistribute it and/or
8	modify it under the terms of the GNU Library General Public
9	License as published by the Free Software Foundation; either
10	version 2 of the License, or (at your option) any later version.
11
12	This library is distributed in the hope that it will be useful,
13	but WITHOUT ANY WARRANTY; without even the implied warranty of
14	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15	Library General Public License for more details.
16
17	You should have received a copy of the GNU Library General Public License
18	along with this library; see the file COPYING.LIB. If not, write to
19	the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
20	Boston, MA 02110-1301, USA.
21	*/
22	/**
23	@file
24	This file is part of the API for handling @ref MIME data and
25	defines the @ref Base64 and @ref RFC2047B @ref Codec classes.
26
27	@brief
28	Defines the Base64Codec and Rfc2047BEncodingCodec classes.
29
30	@authors Marc Mutz \<mutz@kde.org\>
31	*/
32
33	#include "kmime_codec_base64.h"
34
35	#include <kdebug.h>
36
37	#include <cassert>
38
39	using namespace KMime;
40
41	namespace KMime {
42
43	// codec for base64 as specified in RFC 2045
44	//class Base64Codec;
45	//class Base64Decoder;
46	//class Base64Encoder;
47
48	// codec for the B encoding as specified in RFC 2047
49	//class Rfc2047BEncodingCodec;
50	//class Rfc2047BEncodingEncoder;
51	//class Rfc2047BEncodingDecoder;
52
53	//@cond PRIVATE
54	static const uchar base64DecodeMap[`128`] = {
55	`64`, `64`, `64`, `64`, `64`, `64`, `64`, `64`, `64`, `64`, `64`, `64`, `64`, `64`, `64`, `64`,
56	`64`, `64`, `64`, `64`, `64`, `64`, `64`, `64`, `64`, `64`, `64`, `64`, `64`, `64`, `64`, `64`,
57
58	`64`, `64`, `64`, `64`, `64`, `64`, `64`, `64`, `64`, `64`, `64`, `62`, `64`, `64`, `64`, `63`,
59	`52`, `53`, `54`, `55`, `56`, `57`, `58`, `59`, `60`, `61`, `64`, `64`, `64`, `64`, `64`, `64`,
60
61	`64`, `0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`,
62	`15`, `16`, `17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`, `25`, `64`, `64`, `64`, `64`, `64`,
63
64	`64`, `26`, `27`, `28`, `29`, `30`, `31`, `32`, `33`, `34`, `35`, `36`, `37`, `38`, `39`, `40`,
65	`41`, `42`, `43`, `44`, `45`, `46`, `47`, `48`, `49`, `50`, `51`, `64`, `64`, `64`, `64`, `64`
66	};
67
68	static const char base64EncodeMap[`64`] = {
69	'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H',
70	'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
71	'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X',
72	'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f',
73	'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
74	'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
75	'w', 'x', 'y', 'z', '0', '1', '2', '3',
76	'4', '5', '6', '7', '8', '9', '+', '/'
77	};
78	//@endcond
79
80	class Base64Decoder : public Decoder
81	{
82	uint mStepNo;
83	uchar mOutbits;
84	bool mSawPadding : `1`;
85
86	protected:
87	friend class Base64Codec;
88	Base64Decoder( bool withCRLF=false )
89	: Decoder ( withCRLF ), mStepNo( `0` ), mOutbits( `0` ),
90	mSawPadding( false ) {}
91
92	public:
93	virtual ~Base64Decoder() {}
94
95	bool decode( const char* &scursor, const char * const send,
96	char* &dcursor, const char * const dend );
97	// ### really needs no finishing???
98	bool finish( char* &dcursor, const char * const dend )
99	{
100	Q_UNUSED( dcursor ); Q_UNUSED( dend );
101	return true;
102	}
103	};
104
105	class Base64Encoder : public Encoder
106	{
107	uint mStepNo;
108	/* number of already written base64-quartets on current line /
109	uint mWrittenPacketsOnThisLine;
110	uchar mNextbits;
111	bool mInsideFinishing : `1`;
112
113	protected:
114	friend class Rfc2047BEncodingCodec;
115	friend class Rfc2047BEncodingEncoder;
116	friend class Base64Codec;
117	Base64Encoder( bool withCRLF=false )
118	: Encoder ( withCRLF ), mStepNo( `0` ), mWrittenPacketsOnThisLine( `0` ),
119	mNextbits( `0` ), mInsideFinishing( false ) {}
120
121	bool generic_finish( char* &dcursor, const char * const dend,
122	bool withLFatEnd );
123
124	public:
125	virtual ~Base64Encoder() {}
126
127	bool encode( const char* &scursor, const char * const send,
128	char* &dcursor, const char * const dend );
129
130	bool finish( char* &dcursor, const char * const dend );
131
132	protected:
133	bool writeBase64( uchar ch, char* &dcursor, const char * const dend )
134	{ return write( base64EncodeMap[ ch ], dcursor, dend ); }
135	};
136
137	class Rfc2047BEncodingEncoder : public Base64Encoder
138	{
139	protected:
140	friend class Rfc2047BEncodingCodec;
141	Rfc2047BEncodingEncoder( bool withCRLF=false )
142	: Base64Encoder ( withCRLF ) {}
143
144	public:
145	bool encode( const char* &scursor, const char * const send,
146	char* &dcursor, const char * const dend );
147	bool finish( char* &dcursor, const char * const dend );
148	};
149
150	Encoder Base64Codec::makeEncoder( bool* withCRLF ) const
151	{
152	return new Base64Encoder ( withCRLF );
153	}
154
155	Decoder Base64Codec::makeDecoder( bool* withCRLF ) const
156	{
157	return new Base64Decoder ( withCRLF );
158	}
159
160	Encoder Rfc2047BEncodingCodec::makeEncoder( bool* withCRLF ) const
161	{
162	return new Rfc2047BEncodingEncoder ( withCRLF );
163	}
164
165	/******************************************************/
166	/******************************************************/
167	/******************************************************/
168
169	bool Base64Decoder::decode( const char* &scursor, const char * const send,
170	char* &dcursor, const char * const dend )
171	{
172	while ( dcursor != dend && scursor != send ) {
173	uchar ch = *scursor++;
174	uchar value;
175
176	// try converting ch to a 6-bit value:
177	if ( ch < `128` ) {
178	value = base64DecodeMap[ ch ];
179	} else {
180	value = `64`;
181	}
182
183	// ch isn't of the base64 alphabet, check for other significant chars:
184	if ( value >= `64` ) {
185	if ( ch == '=' ) {
186	// padding:
187	if ( mStepNo == `0` \|\| mStepNo == `1` ) {
188	if ( !mSawPadding ) {
189	// malformed
190	kWarning () << "Base64Decoder: unexpected padding"
191	"character in input stream";
192	}
193	mSawPadding = true;
194	break;
195	} else if ( mStepNo == `2` ) {
196	// ok, there should be another one
197	} else if ( mStepNo == `3` ) {
198	// ok, end of encoded stream
199	mSawPadding = true;
200	break;
201	}
202	mSawPadding = true;
203	mStepNo = ( mStepNo + `1` ) % `4`;
204	continue;
205	} else {
206	// non-base64 alphabet
207	continue;
208	}
209	}
210
211	if ( mSawPadding ) {
212	kWarning () << "Base64Decoder: Embedded padding character"
213	"encountered!";
214	return true;
215	}
216
217	// add the new bits to the output stream and flush full octets:
218	switch ( mStepNo ) {
219	case `0`:
220	mOutbits = value << `2`;
221	break;
222	case `1`:
223	dcursor++ = (char*)( mOutbits \| value >> `4` );
224	mOutbits = value << `4`;
225	break;
226	case `2`:
227	dcursor++ = (char*)( mOutbits \| value >> `2` );
228	mOutbits = value << `6`;
229	break;
230	case `3`:
231	dcursor++ = (char*)( mOutbits \| value );
232	mOutbits = `0`;
233	break;
234	default:
235	assert( `0` );
236	}
237	mStepNo = ( mStepNo + `1` ) % `4`;
238	}
239
240	// return false when caller should call us again:
241	return scursor == send;
242	} // Base64Decoder::decode()
243
244	bool Base64Encoder::encode( const char* &scursor, const char * const send,
245	char* &dcursor, const char * const dend )
246	{
247	const uint maxPacketsPerLine = `76` / `4`;
248
249	// detect when the caller doesn't adhere to our rules:
250	if ( mInsideFinishing ) {
251	return true;
252	}
253
254	while ( scursor != send && dcursor != dend ) {
255	// properly empty the output buffer before starting something new:
256	// ### fixme: we can optimize this away, since the buffer isn't
257	// written to anyway (most of the time)
258	if ( mOutputBufferCursor && !flushOutputBuffer( dcursor, dend ) ) {
259	return scursor == send;
260	}
261
262	uchar ch = *scursor++;
263	// mNextbits // (part of) value of next sextet
264
265	// check for line length;
266	if ( mStepNo == `0` && mWrittenPacketsOnThisLine >= maxPacketsPerLine ) {
267	writeCRLF( dcursor, dend );
268	mWrittenPacketsOnThisLine = `0`;
269	}
270
271	// depending on mStepNo, extract value and mNextbits from the
272	// octet stream:
273	switch ( mStepNo ) {
274	case `0`:
275	assert( mNextbits == `0` );
276	writeBase64( ch >> `2`, dcursor, dend ); // top-most 6 bits -> output
277	mNextbits = ( ch & `0x3` ) << `4`; // 0..1 bits -> 4..5 in mNextbits
278	break;
279	case `1`:
280	assert( ( mNextbits & ~`0x30` ) == `0` );
281	writeBase64( mNextbits \| ch >> `4`, dcursor, dend ); // 4..7 bits -> 0..3 in value
282	mNextbits = ( ch & `0xf` ) << `2`; // 0..3 bits -> 2..5 in mNextbits
283	break;
284	case `2`:
285	assert( ( mNextbits & ~`0x3C` ) == `0` );
286	writeBase64( mNextbits \| ch >> `6`, dcursor, dend ); // 6..7 bits -> 0..1 in value
287	writeBase64( ch & `0x3F`, dcursor, dend ); // 0..5 bits -> output
288	mNextbits = `0`;
289	mWrittenPacketsOnThisLine++;
290	break;
291	default:
292	assert( `0` );
293	}
294	mStepNo = ( mStepNo + `1` ) % `3`;
295	}
296
297	if ( mOutputBufferCursor ) {
298	flushOutputBuffer( dcursor, dend );
299	}
300
301	return scursor == send;
302	}
303
304	bool Rfc2047BEncodingEncoder::encode( const char* &scursor,
305	const char * const send,
306	char* &dcursor,
307	const char * const dend )
308	{
309	// detect when the caller doesn't adhere to our rules:
310	if ( mInsideFinishing ) {
311	return true;
312	}
313
314	while ( scursor != send && dcursor != dend ) {
315	// properly empty the output buffer before starting something new:
316	// ### fixme: we can optimize this away, since the buffer isn't
317	// written to anyway (most of the time)
318	if ( mOutputBufferCursor && !flushOutputBuffer( dcursor, dend ) ) {
319	return scursor == send;
320	}
321
322	uchar ch = *scursor++;
323	// mNextbits // (part of) value of next sextet
324
325	// depending on mStepNo, extract value and mNextbits from the
326	// octet stream:
327	switch ( mStepNo ) {
328	case `0`:
329	assert( mNextbits == `0` );
330	writeBase64( ch >> `2`, dcursor, dend ); // top-most 6 bits -> output
331	mNextbits = ( ch & `0x3` ) << `4`; // 0..1 bits -> 4..5 in mNextbits
332	break;
333	case `1`:
334	assert( ( mNextbits & ~`0x30` ) == `0` );
335	writeBase64( mNextbits \| ch >> `4`, dcursor, dend ); // 4..7 bits -> 0..3 in value
336	mNextbits = ( ch & `0xf` ) << `2`; // 0..3 bits -> 2..5 in mNextbits
337	break;
338	case `2`:
339	assert( ( mNextbits & ~`0x3C` ) == `0` );
340	writeBase64( mNextbits \| ch >> `6`, dcursor, dend ); // 6..7 bits -> 0..1 in value
341	writeBase64( ch & `0x3F`, dcursor, dend ); // 0..5 bits -> output
342	mNextbits = `0`;
343	break;
344	default:
345	assert( `0` );
346	}
347	mStepNo = ( mStepNo + `1` ) % `3`;
348	}
349
350	if ( mOutputBufferCursor ) {
351	flushOutputBuffer( dcursor, dend );
352	}
353
354	return scursor == send;
355	}
356
357	bool Base64Encoder::finish( char* &dcursor, const char * const dend )
358	{
359	return generic_finish( dcursor, dend, true );
360	}
361
362	bool Rfc2047BEncodingEncoder::finish( char* & dcursor,
363	const char * const dend )
364	{
365	return generic_finish( dcursor, dend, false );
366	}
367
368	bool Base64Encoder::generic_finish( char* &dcursor, const char * const dend,
369	bool withLFatEnd )
370	{
371	if ( mInsideFinishing ) {
372	return flushOutputBuffer( dcursor, dend );
373	}
374
375	if ( mOutputBufferCursor && !flushOutputBuffer( dcursor, dend ) ) {
376	return false;
377	}
378
379	mInsideFinishing = true;
380
381	//
382	// writing out the last mNextbits...
383	//
384	switch ( mStepNo ) {
385	case `1`: // 2 mNextbits waiting to be written. Needs two padding chars:
386	case `2`: // 4 or 6 mNextbits waiting to be written. Completes a block
387	writeBase64( mNextbits, dcursor, dend );
388	mNextbits = `0`;
389	break;
390	case `0`: // no padding, nothing to be written, except possibly the CRLF
391	assert( mNextbits == `0` );
392	break;
393	default:
394	assert( `0` );
395	}
396
397	//
398	// adding padding...
399	//
400	switch ( mStepNo ) {
401	case `1`:
402	write( '=', dcursor, dend );
403	// fall through:
404	case `2`:
405	write( '=', dcursor, dend );
406	// fall through:
407	case `0`: // completed an quartet - add CRLF
408	if ( withLFatEnd ) {
409	writeCRLF( dcursor, dend );
410	}
411	return flushOutputBuffer( dcursor, dend );
412	default:
413	assert( `0` );
414	}
415	return true; // asserts get compiled out
416	}
417
418	} // namespace KMime
419