1 | /* -*- c++ -*- |
2 | kmime_codec_base64.cpp |
3 | |
4 | KMime, the KDE Internet mail/usenet news message library. |
5 | Copyright (c) 2001 Marc Mutz <mutz@kde.org> |
6 | |
7 | This library is free software; you can redistribute it and/or |
8 | modify it under the terms of the GNU Library General Public |
9 | License as published by the Free Software Foundation; either |
10 | version 2 of the License, or (at your option) any later version. |
11 | |
12 | This library is distributed in the hope that it will be useful, |
13 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
15 | Library General Public License for more details. |
16 | |
17 | You should have received a copy of the GNU Library General Public License |
18 | along with this library; see the file COPYING.LIB. If not, write to |
19 | the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, |
20 | Boston, MA 02110-1301, USA. |
21 | */ |
22 | /** |
23 | @file |
24 | This file is part of the API for handling @ref MIME data and |
25 | defines the @ref Base64 and @ref RFC2047B @ref Codec classes. |
26 | |
27 | @brief |
28 | Defines the Base64Codec and Rfc2047BEncodingCodec classes. |
29 | |
30 | @authors Marc Mutz \<mutz@kde.org\> |
31 | */ |
32 | |
33 | #include "kmime_codec_base64.h" |
34 | |
35 | #include <kdebug.h> |
36 | |
37 | #include <cassert> |
38 | |
39 | using namespace KMime; |
40 | |
41 | namespace KMime { |
42 | |
43 | // codec for base64 as specified in RFC 2045 |
44 | //class Base64Codec; |
45 | //class Base64Decoder; |
46 | //class Base64Encoder; |
47 | |
48 | // codec for the B encoding as specified in RFC 2047 |
49 | //class Rfc2047BEncodingCodec; |
50 | //class Rfc2047BEncodingEncoder; |
51 | //class Rfc2047BEncodingDecoder; |
52 | |
53 | //@cond PRIVATE |
54 | static const uchar base64DecodeMap[128] = { |
55 | 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, |
56 | 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, |
57 | |
58 | 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 62, 64, 64, 64, 63, |
59 | 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 64, 64, 64, 64, 64, 64, |
60 | |
61 | 64, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, |
62 | 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 64, 64, 64, 64, 64, |
63 | |
64 | 64, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, |
65 | 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 64, 64, 64, 64, 64 |
66 | }; |
67 | |
68 | static const char base64EncodeMap[64] = { |
69 | 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', |
70 | 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', |
71 | 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', |
72 | 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f', |
73 | 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', |
74 | 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', |
75 | 'w', 'x', 'y', 'z', '0', '1', '2', '3', |
76 | '4', '5', '6', '7', '8', '9', '+', '/' |
77 | }; |
78 | //@endcond |
79 | |
80 | class Base64Decoder : public Decoder |
81 | { |
82 | uint mStepNo; |
83 | uchar mOutbits; |
84 | bool mSawPadding : 1; |
85 | |
86 | protected: |
87 | friend class Base64Codec; |
88 | Base64Decoder( bool withCRLF=false ) |
89 | : Decoder( withCRLF ), mStepNo( 0 ), mOutbits( 0 ), |
90 | mSawPadding( false ) {} |
91 | |
92 | public: |
93 | virtual ~Base64Decoder() {} |
94 | |
95 | bool decode( const char* &scursor, const char * const send, |
96 | char* &dcursor, const char * const dend ); |
97 | // ### really needs no finishing??? |
98 | bool finish( char* &dcursor, const char * const dend ) |
99 | { |
100 | Q_UNUSED( dcursor ); Q_UNUSED( dend ); |
101 | return true; |
102 | } |
103 | }; |
104 | |
105 | class Base64Encoder : public Encoder |
106 | { |
107 | uint mStepNo; |
108 | /** number of already written base64-quartets on current line */ |
109 | uint mWrittenPacketsOnThisLine; |
110 | uchar mNextbits; |
111 | bool mInsideFinishing : 1; |
112 | |
113 | protected: |
114 | friend class Rfc2047BEncodingCodec; |
115 | friend class Rfc2047BEncodingEncoder; |
116 | friend class Base64Codec; |
117 | Base64Encoder( bool withCRLF=false ) |
118 | : Encoder( withCRLF ), mStepNo( 0 ), mWrittenPacketsOnThisLine( 0 ), |
119 | mNextbits( 0 ), mInsideFinishing( false ) {} |
120 | |
121 | bool generic_finish( char* &dcursor, const char * const dend, |
122 | bool withLFatEnd ); |
123 | |
124 | public: |
125 | virtual ~Base64Encoder() {} |
126 | |
127 | bool encode( const char* &scursor, const char * const send, |
128 | char* &dcursor, const char * const dend ); |
129 | |
130 | bool finish( char* &dcursor, const char * const dend ); |
131 | |
132 | protected: |
133 | bool writeBase64( uchar ch, char* &dcursor, const char * const dend ) |
134 | { return write( base64EncodeMap[ ch ], dcursor, dend ); } |
135 | }; |
136 | |
137 | class Rfc2047BEncodingEncoder : public Base64Encoder |
138 | { |
139 | protected: |
140 | friend class Rfc2047BEncodingCodec; |
141 | Rfc2047BEncodingEncoder( bool withCRLF=false ) |
142 | : Base64Encoder( withCRLF ) {} |
143 | |
144 | public: |
145 | bool encode( const char* &scursor, const char * const send, |
146 | char* &dcursor, const char * const dend ); |
147 | bool finish( char* &dcursor, const char * const dend ); |
148 | }; |
149 | |
150 | Encoder *Base64Codec::makeEncoder( bool withCRLF ) const |
151 | { |
152 | return new Base64Encoder( withCRLF ); |
153 | } |
154 | |
155 | Decoder *Base64Codec::makeDecoder( bool withCRLF ) const |
156 | { |
157 | return new Base64Decoder( withCRLF ); |
158 | } |
159 | |
160 | Encoder *Rfc2047BEncodingCodec::makeEncoder( bool withCRLF ) const |
161 | { |
162 | return new Rfc2047BEncodingEncoder( withCRLF ); |
163 | } |
164 | |
165 | /********************************************************/ |
166 | /********************************************************/ |
167 | /********************************************************/ |
168 | |
169 | bool Base64Decoder::decode( const char* &scursor, const char * const send, |
170 | char* &dcursor, const char * const dend ) |
171 | { |
172 | while ( dcursor != dend && scursor != send ) { |
173 | uchar ch = *scursor++; |
174 | uchar value; |
175 | |
176 | // try converting ch to a 6-bit value: |
177 | if ( ch < 128 ) { |
178 | value = base64DecodeMap[ ch ]; |
179 | } else { |
180 | value = 64; |
181 | } |
182 | |
183 | // ch isn't of the base64 alphabet, check for other significant chars: |
184 | if ( value >= 64 ) { |
185 | if ( ch == '=' ) { |
186 | // padding: |
187 | if ( mStepNo == 0 || mStepNo == 1 ) { |
188 | if ( !mSawPadding ) { |
189 | // malformed |
190 | kWarning() << "Base64Decoder: unexpected padding" |
191 | "character in input stream" ; |
192 | } |
193 | mSawPadding = true; |
194 | break; |
195 | } else if ( mStepNo == 2 ) { |
196 | // ok, there should be another one |
197 | } else if ( mStepNo == 3 ) { |
198 | // ok, end of encoded stream |
199 | mSawPadding = true; |
200 | break; |
201 | } |
202 | mSawPadding = true; |
203 | mStepNo = ( mStepNo + 1 ) % 4; |
204 | continue; |
205 | } else { |
206 | // non-base64 alphabet |
207 | continue; |
208 | } |
209 | } |
210 | |
211 | if ( mSawPadding ) { |
212 | kWarning() << "Base64Decoder: Embedded padding character" |
213 | "encountered!" ; |
214 | return true; |
215 | } |
216 | |
217 | // add the new bits to the output stream and flush full octets: |
218 | switch ( mStepNo ) { |
219 | case 0: |
220 | mOutbits = value << 2; |
221 | break; |
222 | case 1: |
223 | *dcursor++ = (char)( mOutbits | value >> 4 ); |
224 | mOutbits = value << 4; |
225 | break; |
226 | case 2: |
227 | *dcursor++ = (char)( mOutbits | value >> 2 ); |
228 | mOutbits = value << 6; |
229 | break; |
230 | case 3: |
231 | *dcursor++ = (char)( mOutbits | value ); |
232 | mOutbits = 0; |
233 | break; |
234 | default: |
235 | assert( 0 ); |
236 | } |
237 | mStepNo = ( mStepNo + 1 ) % 4; |
238 | } |
239 | |
240 | // return false when caller should call us again: |
241 | return scursor == send; |
242 | } // Base64Decoder::decode() |
243 | |
244 | bool Base64Encoder::encode( const char* &scursor, const char * const send, |
245 | char* &dcursor, const char * const dend ) |
246 | { |
247 | const uint maxPacketsPerLine = 76 / 4; |
248 | |
249 | // detect when the caller doesn't adhere to our rules: |
250 | if ( mInsideFinishing ) { |
251 | return true; |
252 | } |
253 | |
254 | while ( scursor != send && dcursor != dend ) { |
255 | // properly empty the output buffer before starting something new: |
256 | // ### fixme: we can optimize this away, since the buffer isn't |
257 | // written to anyway (most of the time) |
258 | if ( mOutputBufferCursor && !flushOutputBuffer( dcursor, dend ) ) { |
259 | return scursor == send; |
260 | } |
261 | |
262 | uchar ch = *scursor++; |
263 | // mNextbits // (part of) value of next sextet |
264 | |
265 | // check for line length; |
266 | if ( mStepNo == 0 && mWrittenPacketsOnThisLine >= maxPacketsPerLine ) { |
267 | writeCRLF( dcursor, dend ); |
268 | mWrittenPacketsOnThisLine = 0; |
269 | } |
270 | |
271 | // depending on mStepNo, extract value and mNextbits from the |
272 | // octet stream: |
273 | switch ( mStepNo ) { |
274 | case 0: |
275 | assert( mNextbits == 0 ); |
276 | writeBase64( ch >> 2, dcursor, dend ); // top-most 6 bits -> output |
277 | mNextbits = ( ch & 0x3 ) << 4; // 0..1 bits -> 4..5 in mNextbits |
278 | break; |
279 | case 1: |
280 | assert( ( mNextbits & ~0x30 ) == 0 ); |
281 | writeBase64( mNextbits | ch >> 4, dcursor, dend ); // 4..7 bits -> 0..3 in value |
282 | mNextbits = ( ch & 0xf ) << 2; // 0..3 bits -> 2..5 in mNextbits |
283 | break; |
284 | case 2: |
285 | assert( ( mNextbits & ~0x3C ) == 0 ); |
286 | writeBase64( mNextbits | ch >> 6, dcursor, dend ); // 6..7 bits -> 0..1 in value |
287 | writeBase64( ch & 0x3F, dcursor, dend ); // 0..5 bits -> output |
288 | mNextbits = 0; |
289 | mWrittenPacketsOnThisLine++; |
290 | break; |
291 | default: |
292 | assert( 0 ); |
293 | } |
294 | mStepNo = ( mStepNo + 1 ) % 3; |
295 | } |
296 | |
297 | if ( mOutputBufferCursor ) { |
298 | flushOutputBuffer( dcursor, dend ); |
299 | } |
300 | |
301 | return scursor == send; |
302 | } |
303 | |
304 | bool Rfc2047BEncodingEncoder::encode( const char* &scursor, |
305 | const char * const send, |
306 | char* &dcursor, |
307 | const char * const dend ) |
308 | { |
309 | // detect when the caller doesn't adhere to our rules: |
310 | if ( mInsideFinishing ) { |
311 | return true; |
312 | } |
313 | |
314 | while ( scursor != send && dcursor != dend ) { |
315 | // properly empty the output buffer before starting something new: |
316 | // ### fixme: we can optimize this away, since the buffer isn't |
317 | // written to anyway (most of the time) |
318 | if ( mOutputBufferCursor && !flushOutputBuffer( dcursor, dend ) ) { |
319 | return scursor == send; |
320 | } |
321 | |
322 | uchar ch = *scursor++; |
323 | // mNextbits // (part of) value of next sextet |
324 | |
325 | // depending on mStepNo, extract value and mNextbits from the |
326 | // octet stream: |
327 | switch ( mStepNo ) { |
328 | case 0: |
329 | assert( mNextbits == 0 ); |
330 | writeBase64( ch >> 2, dcursor, dend ); // top-most 6 bits -> output |
331 | mNextbits = ( ch & 0x3 ) << 4; // 0..1 bits -> 4..5 in mNextbits |
332 | break; |
333 | case 1: |
334 | assert( ( mNextbits & ~0x30 ) == 0 ); |
335 | writeBase64( mNextbits | ch >> 4, dcursor, dend ); // 4..7 bits -> 0..3 in value |
336 | mNextbits = ( ch & 0xf ) << 2; // 0..3 bits -> 2..5 in mNextbits |
337 | break; |
338 | case 2: |
339 | assert( ( mNextbits & ~0x3C ) == 0 ); |
340 | writeBase64( mNextbits | ch >> 6, dcursor, dend ); // 6..7 bits -> 0..1 in value |
341 | writeBase64( ch & 0x3F, dcursor, dend ); // 0..5 bits -> output |
342 | mNextbits = 0; |
343 | break; |
344 | default: |
345 | assert( 0 ); |
346 | } |
347 | mStepNo = ( mStepNo + 1 ) % 3; |
348 | } |
349 | |
350 | if ( mOutputBufferCursor ) { |
351 | flushOutputBuffer( dcursor, dend ); |
352 | } |
353 | |
354 | return scursor == send; |
355 | } |
356 | |
357 | bool Base64Encoder::finish( char* &dcursor, const char * const dend ) |
358 | { |
359 | return generic_finish( dcursor, dend, true ); |
360 | } |
361 | |
362 | bool Rfc2047BEncodingEncoder::finish( char* & dcursor, |
363 | const char * const dend ) |
364 | { |
365 | return generic_finish( dcursor, dend, false ); |
366 | } |
367 | |
368 | bool Base64Encoder::generic_finish( char* &dcursor, const char * const dend, |
369 | bool withLFatEnd ) |
370 | { |
371 | if ( mInsideFinishing ) { |
372 | return flushOutputBuffer( dcursor, dend ); |
373 | } |
374 | |
375 | if ( mOutputBufferCursor && !flushOutputBuffer( dcursor, dend ) ) { |
376 | return false; |
377 | } |
378 | |
379 | mInsideFinishing = true; |
380 | |
381 | // |
382 | // writing out the last mNextbits... |
383 | // |
384 | switch ( mStepNo ) { |
385 | case 1: // 2 mNextbits waiting to be written. Needs two padding chars: |
386 | case 2: // 4 or 6 mNextbits waiting to be written. Completes a block |
387 | writeBase64( mNextbits, dcursor, dend ); |
388 | mNextbits = 0; |
389 | break; |
390 | case 0: // no padding, nothing to be written, except possibly the CRLF |
391 | assert( mNextbits == 0 ); |
392 | break; |
393 | default: |
394 | assert( 0 ); |
395 | } |
396 | |
397 | // |
398 | // adding padding... |
399 | // |
400 | switch ( mStepNo ) { |
401 | case 1: |
402 | write( '=', dcursor, dend ); |
403 | // fall through: |
404 | case 2: |
405 | write( '=', dcursor, dend ); |
406 | // fall through: |
407 | case 0: // completed an quartet - add CRLF |
408 | if ( withLFatEnd ) { |
409 | writeCRLF( dcursor, dend ); |
410 | } |
411 | return flushOutputBuffer( dcursor, dend ); |
412 | default: |
413 | assert( 0 ); |
414 | } |
415 | return true; // asserts get compiled out |
416 | } |
417 | |
418 | } // namespace KMime |
419 | |