1/* -*- c++ -*-
2 kmime_codec_base64.cpp
3
4 KMime, the KDE Internet mail/usenet news message library.
5 Copyright (c) 2001 Marc Mutz <mutz@kde.org>
6
7 This library is free software; you can redistribute it and/or
8 modify it under the terms of the GNU Library General Public
9 License as published by the Free Software Foundation; either
10 version 2 of the License, or (at your option) any later version.
11
12 This library is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 Library General Public License for more details.
16
17 You should have received a copy of the GNU Library General Public License
18 along with this library; see the file COPYING.LIB. If not, write to
19 the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
20 Boston, MA 02110-1301, USA.
21*/
22/**
23 @file
24 This file is part of the API for handling @ref MIME data and
25 defines the @ref Base64 and @ref RFC2047B @ref Codec classes.
26
27 @brief
28 Defines the Base64Codec and Rfc2047BEncodingCodec classes.
29
30 @authors Marc Mutz \<mutz@kde.org\>
31*/
32
33#include "kmime_codec_base64.h"
34
35#include <kdebug.h>
36
37#include <cassert>
38
39using namespace KMime;
40
41namespace KMime {
42
43// codec for base64 as specified in RFC 2045
44//class Base64Codec;
45//class Base64Decoder;
46//class Base64Encoder;
47
48// codec for the B encoding as specified in RFC 2047
49//class Rfc2047BEncodingCodec;
50//class Rfc2047BEncodingEncoder;
51//class Rfc2047BEncodingDecoder;
52
53//@cond PRIVATE
54static const uchar base64DecodeMap[128] = {
55 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
56 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
57
58 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 62, 64, 64, 64, 63,
59 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 64, 64, 64, 64, 64, 64,
60
61 64, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
62 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 64, 64, 64, 64, 64,
63
64 64, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
65 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 64, 64, 64, 64, 64
66};
67
68static const char base64EncodeMap[64] = {
69 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H',
70 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
71 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X',
72 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f',
73 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
74 'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
75 'w', 'x', 'y', 'z', '0', '1', '2', '3',
76 '4', '5', '6', '7', '8', '9', '+', '/'
77};
78//@endcond
79
80class Base64Decoder : public Decoder
81{
82 uint mStepNo;
83 uchar mOutbits;
84 bool mSawPadding : 1;
85
86 protected:
87 friend class Base64Codec;
88 Base64Decoder( bool withCRLF=false )
89 : Decoder( withCRLF ), mStepNo( 0 ), mOutbits( 0 ),
90 mSawPadding( false ) {}
91
92 public:
93 virtual ~Base64Decoder() {}
94
95 bool decode( const char* &scursor, const char * const send,
96 char* &dcursor, const char * const dend );
97 // ### really needs no finishing???
98 bool finish( char* &dcursor, const char * const dend )
99 {
100 Q_UNUSED( dcursor ); Q_UNUSED( dend );
101 return true;
102 }
103};
104
105class Base64Encoder : public Encoder
106{
107 uint mStepNo;
108 /** number of already written base64-quartets on current line */
109 uint mWrittenPacketsOnThisLine;
110 uchar mNextbits;
111 bool mInsideFinishing : 1;
112
113 protected:
114 friend class Rfc2047BEncodingCodec;
115 friend class Rfc2047BEncodingEncoder;
116 friend class Base64Codec;
117 Base64Encoder( bool withCRLF=false )
118 : Encoder( withCRLF ), mStepNo( 0 ), mWrittenPacketsOnThisLine( 0 ),
119 mNextbits( 0 ), mInsideFinishing( false ) {}
120
121 bool generic_finish( char* &dcursor, const char * const dend,
122 bool withLFatEnd );
123
124 public:
125 virtual ~Base64Encoder() {}
126
127 bool encode( const char* &scursor, const char * const send,
128 char* &dcursor, const char * const dend );
129
130 bool finish( char* &dcursor, const char * const dend );
131
132 protected:
133 bool writeBase64( uchar ch, char* &dcursor, const char * const dend )
134 { return write( base64EncodeMap[ ch ], dcursor, dend ); }
135};
136
137class Rfc2047BEncodingEncoder : public Base64Encoder
138{
139 protected:
140 friend class Rfc2047BEncodingCodec;
141 Rfc2047BEncodingEncoder( bool withCRLF=false )
142 : Base64Encoder( withCRLF ) {}
143
144 public:
145 bool encode( const char* &scursor, const char * const send,
146 char* &dcursor, const char * const dend );
147 bool finish( char* &dcursor, const char * const dend );
148};
149
150Encoder *Base64Codec::makeEncoder( bool withCRLF ) const
151{
152 return new Base64Encoder( withCRLF );
153}
154
155Decoder *Base64Codec::makeDecoder( bool withCRLF ) const
156{
157 return new Base64Decoder( withCRLF );
158}
159
160Encoder *Rfc2047BEncodingCodec::makeEncoder( bool withCRLF ) const
161{
162 return new Rfc2047BEncodingEncoder( withCRLF );
163}
164
165/********************************************************/
166/********************************************************/
167/********************************************************/
168
169bool Base64Decoder::decode( const char* &scursor, const char * const send,
170 char* &dcursor, const char * const dend )
171{
172 while ( dcursor != dend && scursor != send ) {
173 uchar ch = *scursor++;
174 uchar value;
175
176 // try converting ch to a 6-bit value:
177 if ( ch < 128 ) {
178 value = base64DecodeMap[ ch ];
179 } else {
180 value = 64;
181 }
182
183 // ch isn't of the base64 alphabet, check for other significant chars:
184 if ( value >= 64 ) {
185 if ( ch == '=' ) {
186 // padding:
187 if ( mStepNo == 0 || mStepNo == 1 ) {
188 if ( !mSawPadding ) {
189 // malformed
190 kWarning() << "Base64Decoder: unexpected padding"
191 "character in input stream";
192 }
193 mSawPadding = true;
194 break;
195 } else if ( mStepNo == 2 ) {
196 // ok, there should be another one
197 } else if ( mStepNo == 3 ) {
198 // ok, end of encoded stream
199 mSawPadding = true;
200 break;
201 }
202 mSawPadding = true;
203 mStepNo = ( mStepNo + 1 ) % 4;
204 continue;
205 } else {
206 // non-base64 alphabet
207 continue;
208 }
209 }
210
211 if ( mSawPadding ) {
212 kWarning() << "Base64Decoder: Embedded padding character"
213 "encountered!";
214 return true;
215 }
216
217 // add the new bits to the output stream and flush full octets:
218 switch ( mStepNo ) {
219 case 0:
220 mOutbits = value << 2;
221 break;
222 case 1:
223 *dcursor++ = (char)( mOutbits | value >> 4 );
224 mOutbits = value << 4;
225 break;
226 case 2:
227 *dcursor++ = (char)( mOutbits | value >> 2 );
228 mOutbits = value << 6;
229 break;
230 case 3:
231 *dcursor++ = (char)( mOutbits | value );
232 mOutbits = 0;
233 break;
234 default:
235 assert( 0 );
236 }
237 mStepNo = ( mStepNo + 1 ) % 4;
238 }
239
240 // return false when caller should call us again:
241 return scursor == send;
242} // Base64Decoder::decode()
243
244bool Base64Encoder::encode( const char* &scursor, const char * const send,
245 char* &dcursor, const char * const dend )
246{
247 const uint maxPacketsPerLine = 76 / 4;
248
249 // detect when the caller doesn't adhere to our rules:
250 if ( mInsideFinishing ) {
251 return true;
252 }
253
254 while ( scursor != send && dcursor != dend ) {
255 // properly empty the output buffer before starting something new:
256 // ### fixme: we can optimize this away, since the buffer isn't
257 // written to anyway (most of the time)
258 if ( mOutputBufferCursor && !flushOutputBuffer( dcursor, dend ) ) {
259 return scursor == send;
260 }
261
262 uchar ch = *scursor++;
263 // mNextbits // (part of) value of next sextet
264
265 // check for line length;
266 if ( mStepNo == 0 && mWrittenPacketsOnThisLine >= maxPacketsPerLine ) {
267 writeCRLF( dcursor, dend );
268 mWrittenPacketsOnThisLine = 0;
269 }
270
271 // depending on mStepNo, extract value and mNextbits from the
272 // octet stream:
273 switch ( mStepNo ) {
274 case 0:
275 assert( mNextbits == 0 );
276 writeBase64( ch >> 2, dcursor, dend ); // top-most 6 bits -> output
277 mNextbits = ( ch & 0x3 ) << 4; // 0..1 bits -> 4..5 in mNextbits
278 break;
279 case 1:
280 assert( ( mNextbits & ~0x30 ) == 0 );
281 writeBase64( mNextbits | ch >> 4, dcursor, dend ); // 4..7 bits -> 0..3 in value
282 mNextbits = ( ch & 0xf ) << 2; // 0..3 bits -> 2..5 in mNextbits
283 break;
284 case 2:
285 assert( ( mNextbits & ~0x3C ) == 0 );
286 writeBase64( mNextbits | ch >> 6, dcursor, dend ); // 6..7 bits -> 0..1 in value
287 writeBase64( ch & 0x3F, dcursor, dend ); // 0..5 bits -> output
288 mNextbits = 0;
289 mWrittenPacketsOnThisLine++;
290 break;
291 default:
292 assert( 0 );
293 }
294 mStepNo = ( mStepNo + 1 ) % 3;
295 }
296
297 if ( mOutputBufferCursor ) {
298 flushOutputBuffer( dcursor, dend );
299 }
300
301 return scursor == send;
302}
303
304bool Rfc2047BEncodingEncoder::encode( const char* &scursor,
305 const char * const send,
306 char* &dcursor,
307 const char * const dend )
308{
309 // detect when the caller doesn't adhere to our rules:
310 if ( mInsideFinishing ) {
311 return true;
312 }
313
314 while ( scursor != send && dcursor != dend ) {
315 // properly empty the output buffer before starting something new:
316 // ### fixme: we can optimize this away, since the buffer isn't
317 // written to anyway (most of the time)
318 if ( mOutputBufferCursor && !flushOutputBuffer( dcursor, dend ) ) {
319 return scursor == send;
320 }
321
322 uchar ch = *scursor++;
323 // mNextbits // (part of) value of next sextet
324
325 // depending on mStepNo, extract value and mNextbits from the
326 // octet stream:
327 switch ( mStepNo ) {
328 case 0:
329 assert( mNextbits == 0 );
330 writeBase64( ch >> 2, dcursor, dend ); // top-most 6 bits -> output
331 mNextbits = ( ch & 0x3 ) << 4; // 0..1 bits -> 4..5 in mNextbits
332 break;
333 case 1:
334 assert( ( mNextbits & ~0x30 ) == 0 );
335 writeBase64( mNextbits | ch >> 4, dcursor, dend ); // 4..7 bits -> 0..3 in value
336 mNextbits = ( ch & 0xf ) << 2; // 0..3 bits -> 2..5 in mNextbits
337 break;
338 case 2:
339 assert( ( mNextbits & ~0x3C ) == 0 );
340 writeBase64( mNextbits | ch >> 6, dcursor, dend ); // 6..7 bits -> 0..1 in value
341 writeBase64( ch & 0x3F, dcursor, dend ); // 0..5 bits -> output
342 mNextbits = 0;
343 break;
344 default:
345 assert( 0 );
346 }
347 mStepNo = ( mStepNo + 1 ) % 3;
348 }
349
350 if ( mOutputBufferCursor ) {
351 flushOutputBuffer( dcursor, dend );
352 }
353
354 return scursor == send;
355}
356
357bool Base64Encoder::finish( char* &dcursor, const char * const dend )
358{
359 return generic_finish( dcursor, dend, true );
360}
361
362bool Rfc2047BEncodingEncoder::finish( char* & dcursor,
363 const char * const dend )
364{
365 return generic_finish( dcursor, dend, false );
366}
367
368bool Base64Encoder::generic_finish( char* &dcursor, const char * const dend,
369 bool withLFatEnd )
370{
371 if ( mInsideFinishing ) {
372 return flushOutputBuffer( dcursor, dend );
373 }
374
375 if ( mOutputBufferCursor && !flushOutputBuffer( dcursor, dend ) ) {
376 return false;
377 }
378
379 mInsideFinishing = true;
380
381 //
382 // writing out the last mNextbits...
383 //
384 switch ( mStepNo ) {
385 case 1: // 2 mNextbits waiting to be written. Needs two padding chars:
386 case 2: // 4 or 6 mNextbits waiting to be written. Completes a block
387 writeBase64( mNextbits, dcursor, dend );
388 mNextbits = 0;
389 break;
390 case 0: // no padding, nothing to be written, except possibly the CRLF
391 assert( mNextbits == 0 );
392 break;
393 default:
394 assert( 0 );
395 }
396
397 //
398 // adding padding...
399 //
400 switch ( mStepNo ) {
401 case 1:
402 write( '=', dcursor, dend );
403 // fall through:
404 case 2:
405 write( '=', dcursor, dend );
406 // fall through:
407 case 0: // completed an quartet - add CRLF
408 if ( withLFatEnd ) {
409 writeCRLF( dcursor, dend );
410 }
411 return flushOutputBuffer( dcursor, dend );
412 default:
413 assert( 0 );
414 }
415 return true; // asserts get compiled out
416}
417
418} // namespace KMime
419