source: trunk/essentials/dev-lang/python/Modules/binascii.c@ 3403

Last change on this file since 3403 was 3225, checked in by bird, 19 years ago

Python 2.5

File size: 41.6 KB
Line 
1/*
2** Routines to represent binary data in ASCII and vice-versa
3**
4** This module currently supports the following encodings:
5** uuencode:
6** each line encodes 45 bytes (except possibly the last)
7** First char encodes (binary) length, rest data
8** each char encodes 6 bits, as follows:
9** binary: 01234567 abcdefgh ijklmnop
10** ascii: 012345 67abcd efghij klmnop
11** ASCII encoding method is "excess-space": 000000 is encoded as ' ', etc.
12** short binary data is zero-extended (so the bits are always in the
13** right place), this does *not* reflect in the length.
14** base64:
15** Line breaks are insignificant, but lines are at most 76 chars
16** each char encodes 6 bits, in similar order as uucode/hqx. Encoding
17** is done via a table.
18** Short binary data is filled (in ASCII) with '='.
19** hqx:
20** File starts with introductory text, real data starts and ends
21** with colons.
22** Data consists of three similar parts: info, datafork, resourcefork.
23** Each part is protected (at the end) with a 16-bit crc
24** The binary data is run-length encoded, and then ascii-fied:
25** binary: 01234567 abcdefgh ijklmnop
26** ascii: 012345 67abcd efghij klmnop
27** ASCII encoding is table-driven, see the code.
28** Short binary data results in the runt ascii-byte being output with
29** the bits in the right place.
30**
31** While I was reading dozens of programs that encode or decode the formats
32** here (documentation? hihi:-) I have formulated Jansen's Observation:
33**
34** Programs that encode binary data in ASCII are written in
35** such a style that they are as unreadable as possible. Devices used
36** include unnecessary global variables, burying important tables
37** in unrelated sourcefiles, putting functions in include files,
38** using seemingly-descriptive variable names for different purposes,
39** calls to empty subroutines and a host of others.
40**
41** I have attempted to break with this tradition, but I guess that that
42** does make the performance sub-optimal. Oh well, too bad...
43**
44** Jack Jansen, CWI, July 1995.
45**
46** Added support for quoted-printable encoding, based on rfc 1521 et al
47** quoted-printable encoding specifies that non printable characters (anything
48** below 32 and above 126) be encoded as =XX where XX is the hexadecimal value
49** of the character. It also specifies some other behavior to enable 8bit data
50** in a mail message with little difficulty (maximum line sizes, protecting
51** some cases of whitespace, etc).
52**
53** Brandon Long, September 2001.
54*/
55
56#define PY_SSIZE_T_CLEAN
57
58#include "Python.h"
59
60static PyObject *Error;
61static PyObject *Incomplete;
62
63/*
64** hqx lookup table, ascii->binary.
65*/
66
67#define RUNCHAR 0x90
68
69#define DONE 0x7F
70#define SKIP 0x7E
71#define FAIL 0x7D
72
73static unsigned char table_a2b_hqx[256] = {
74/* ^@ ^A ^B ^C ^D ^E ^F ^G */
75/* 0*/ FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
76/* \b \t \n ^K ^L \r ^N ^O */
77/* 1*/ FAIL, FAIL, SKIP, FAIL, FAIL, SKIP, FAIL, FAIL,
78/* ^P ^Q ^R ^S ^T ^U ^V ^W */
79/* 2*/ FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
80/* ^X ^Y ^Z ^[ ^\ ^] ^^ ^_ */
81/* 3*/ FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
82/* ! " # $ % & ' */
83/* 4*/ FAIL, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
84/* ( ) * + , - . / */
85/* 5*/ 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, FAIL, FAIL,
86/* 0 1 2 3 4 5 6 7 */
87/* 6*/ 0x0D, 0x0E, 0x0F, 0x10, 0x11, 0x12, 0x13, FAIL,
88/* 8 9 : ; < = > ? */
89/* 7*/ 0x14, 0x15, DONE, FAIL, FAIL, FAIL, FAIL, FAIL,
90/* @ A B C D E F G */
91/* 8*/ 0x16, 0x17, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D,
92/* H I J K L M N O */
93/* 9*/ 0x1E, 0x1F, 0x20, 0x21, 0x22, 0x23, 0x24, FAIL,
94/* P Q R S T U V W */
95/*10*/ 0x25, 0x26, 0x27, 0x28, 0x29, 0x2A, 0x2B, FAIL,
96/* X Y Z [ \ ] ^ _ */
97/*11*/ 0x2C, 0x2D, 0x2E, 0x2F, FAIL, FAIL, FAIL, FAIL,
98/* ` a b c d e f g */
99/*12*/ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, FAIL,
100/* h i j k l m n o */
101/*13*/ 0x37, 0x38, 0x39, 0x3A, 0x3B, 0x3C, FAIL, FAIL,
102/* p q r s t u v w */
103/*14*/ 0x3D, 0x3E, 0x3F, FAIL, FAIL, FAIL, FAIL, FAIL,
104/* x y z { | } ~ ^? */
105/*15*/ FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
106/*16*/ FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
107 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
108 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
109 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
110 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
111 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
112 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
113 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
114 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
115 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
116 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
117 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
118 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
119 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
120 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
121 FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
122};
123
124static unsigned char table_b2a_hqx[] =
125"!\"#$%&'()*+,-012345689@ABCDEFGHIJKLMNPQRSTUVXYZ[`abcdefhijklmpqr";
126
127static char table_a2b_base64[] = {
128 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
129 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
130 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,62, -1,-1,-1,63,
131 52,53,54,55, 56,57,58,59, 60,61,-1,-1, -1, 0,-1,-1, /* Note PAD->0 */
132 -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10, 11,12,13,14,
133 15,16,17,18, 19,20,21,22, 23,24,25,-1, -1,-1,-1,-1,
134 -1,26,27,28, 29,30,31,32, 33,34,35,36, 37,38,39,40,
135 41,42,43,44, 45,46,47,48, 49,50,51,-1, -1,-1,-1,-1
136};
137
138#define BASE64_PAD '='
139
140/* Max binary chunk size; limited only by available memory */
141#define BASE64_MAXBIN (INT_MAX/2 - sizeof(PyStringObject) - 3)
142
143static unsigned char table_b2a_base64[] =
144"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
145
146
147
148static unsigned short crctab_hqx[256] = {
149 0x0000, 0x1021, 0x2042, 0x3063, 0x4084, 0x50a5, 0x60c6, 0x70e7,
150 0x8108, 0x9129, 0xa14a, 0xb16b, 0xc18c, 0xd1ad, 0xe1ce, 0xf1ef,
151 0x1231, 0x0210, 0x3273, 0x2252, 0x52b5, 0x4294, 0x72f7, 0x62d6,
152 0x9339, 0x8318, 0xb37b, 0xa35a, 0xd3bd, 0xc39c, 0xf3ff, 0xe3de,
153 0x2462, 0x3443, 0x0420, 0x1401, 0x64e6, 0x74c7, 0x44a4, 0x5485,
154 0xa56a, 0xb54b, 0x8528, 0x9509, 0xe5ee, 0xf5cf, 0xc5ac, 0xd58d,
155 0x3653, 0x2672, 0x1611, 0x0630, 0x76d7, 0x66f6, 0x5695, 0x46b4,
156 0xb75b, 0xa77a, 0x9719, 0x8738, 0xf7df, 0xe7fe, 0xd79d, 0xc7bc,
157 0x48c4, 0x58e5, 0x6886, 0x78a7, 0x0840, 0x1861, 0x2802, 0x3823,
158 0xc9cc, 0xd9ed, 0xe98e, 0xf9af, 0x8948, 0x9969, 0xa90a, 0xb92b,
159 0x5af5, 0x4ad4, 0x7ab7, 0x6a96, 0x1a71, 0x0a50, 0x3a33, 0x2a12,
160 0xdbfd, 0xcbdc, 0xfbbf, 0xeb9e, 0x9b79, 0x8b58, 0xbb3b, 0xab1a,
161 0x6ca6, 0x7c87, 0x4ce4, 0x5cc5, 0x2c22, 0x3c03, 0x0c60, 0x1c41,
162 0xedae, 0xfd8f, 0xcdec, 0xddcd, 0xad2a, 0xbd0b, 0x8d68, 0x9d49,
163 0x7e97, 0x6eb6, 0x5ed5, 0x4ef4, 0x3e13, 0x2e32, 0x1e51, 0x0e70,
164 0xff9f, 0xefbe, 0xdfdd, 0xcffc, 0xbf1b, 0xaf3a, 0x9f59, 0x8f78,
165 0x9188, 0x81a9, 0xb1ca, 0xa1eb, 0xd10c, 0xc12d, 0xf14e, 0xe16f,
166 0x1080, 0x00a1, 0x30c2, 0x20e3, 0x5004, 0x4025, 0x7046, 0x6067,
167 0x83b9, 0x9398, 0xa3fb, 0xb3da, 0xc33d, 0xd31c, 0xe37f, 0xf35e,
168 0x02b1, 0x1290, 0x22f3, 0x32d2, 0x4235, 0x5214, 0x6277, 0x7256,
169 0xb5ea, 0xa5cb, 0x95a8, 0x8589, 0xf56e, 0xe54f, 0xd52c, 0xc50d,
170 0x34e2, 0x24c3, 0x14a0, 0x0481, 0x7466, 0x6447, 0x5424, 0x4405,
171 0xa7db, 0xb7fa, 0x8799, 0x97b8, 0xe75f, 0xf77e, 0xc71d, 0xd73c,
172 0x26d3, 0x36f2, 0x0691, 0x16b0, 0x6657, 0x7676, 0x4615, 0x5634,
173 0xd94c, 0xc96d, 0xf90e, 0xe92f, 0x99c8, 0x89e9, 0xb98a, 0xa9ab,
174 0x5844, 0x4865, 0x7806, 0x6827, 0x18c0, 0x08e1, 0x3882, 0x28a3,
175 0xcb7d, 0xdb5c, 0xeb3f, 0xfb1e, 0x8bf9, 0x9bd8, 0xabbb, 0xbb9a,
176 0x4a75, 0x5a54, 0x6a37, 0x7a16, 0x0af1, 0x1ad0, 0x2ab3, 0x3a92,
177 0xfd2e, 0xed0f, 0xdd6c, 0xcd4d, 0xbdaa, 0xad8b, 0x9de8, 0x8dc9,
178 0x7c26, 0x6c07, 0x5c64, 0x4c45, 0x3ca2, 0x2c83, 0x1ce0, 0x0cc1,
179 0xef1f, 0xff3e, 0xcf5d, 0xdf7c, 0xaf9b, 0xbfba, 0x8fd9, 0x9ff8,
180 0x6e17, 0x7e36, 0x4e55, 0x5e74, 0x2e93, 0x3eb2, 0x0ed1, 0x1ef0,
181};
182
183PyDoc_STRVAR(doc_a2b_uu, "(ascii) -> bin. Decode a line of uuencoded data");
184
185static PyObject *
186binascii_a2b_uu(PyObject *self, PyObject *args)
187{
188 unsigned char *ascii_data, *bin_data;
189 int leftbits = 0;
190 unsigned char this_ch;
191 unsigned int leftchar = 0;
192 PyObject *rv;
193 Py_ssize_t ascii_len, bin_len;
194
195 if ( !PyArg_ParseTuple(args, "t#:a2b_uu", &ascii_data, &ascii_len) )
196 return NULL;
197
198 /* First byte: binary data length (in bytes) */
199 bin_len = (*ascii_data++ - ' ') & 077;
200 ascii_len--;
201
202 /* Allocate the buffer */
203 if ( (rv=PyString_FromStringAndSize(NULL, bin_len)) == NULL )
204 return NULL;
205 bin_data = (unsigned char *)PyString_AsString(rv);
206
207 for( ; bin_len > 0 ; ascii_len--, ascii_data++ ) {
208 /* XXX is it really best to add NULs if there's no more data */
209 this_ch = (ascii_len > 0) ? *ascii_data : 0;
210 if ( this_ch == '\n' || this_ch == '\r' || ascii_len <= 0) {
211 /*
212 ** Whitespace. Assume some spaces got eaten at
213 ** end-of-line. (We check this later)
214 */
215 this_ch = 0;
216 } else {
217 /* Check the character for legality
218 ** The 64 in stead of the expected 63 is because
219 ** there are a few uuencodes out there that use
220 ** '`' as zero instead of space.
221 */
222 if ( this_ch < ' ' || this_ch > (' ' + 64)) {
223 PyErr_SetString(Error, "Illegal char");
224 Py_DECREF(rv);
225 return NULL;
226 }
227 this_ch = (this_ch - ' ') & 077;
228 }
229 /*
230 ** Shift it in on the low end, and see if there's
231 ** a byte ready for output.
232 */
233 leftchar = (leftchar << 6) | (this_ch);
234 leftbits += 6;
235 if ( leftbits >= 8 ) {
236 leftbits -= 8;
237 *bin_data++ = (leftchar >> leftbits) & 0xff;
238 leftchar &= ((1 << leftbits) - 1);
239 bin_len--;
240 }
241 }
242 /*
243 ** Finally, check that if there's anything left on the line
244 ** that it's whitespace only.
245 */
246 while( ascii_len-- > 0 ) {
247 this_ch = *ascii_data++;
248 /* Extra '`' may be written as padding in some cases */
249 if ( this_ch != ' ' && this_ch != ' '+64 &&
250 this_ch != '\n' && this_ch != '\r' ) {
251 PyErr_SetString(Error, "Trailing garbage");
252 Py_DECREF(rv);
253 return NULL;
254 }
255 }
256 return rv;
257}
258
259PyDoc_STRVAR(doc_b2a_uu, "(bin) -> ascii. Uuencode line of data");
260
261static PyObject *
262binascii_b2a_uu(PyObject *self, PyObject *args)
263{
264 unsigned char *ascii_data, *bin_data;
265 int leftbits = 0;
266 unsigned char this_ch;
267 unsigned int leftchar = 0;
268 PyObject *rv;
269 Py_ssize_t bin_len;
270
271 if ( !PyArg_ParseTuple(args, "s#:b2a_uu", &bin_data, &bin_len) )
272 return NULL;
273 if ( bin_len > 45 ) {
274 /* The 45 is a limit that appears in all uuencode's */
275 PyErr_SetString(Error, "At most 45 bytes at once");
276 return NULL;
277 }
278
279 /* We're lazy and allocate to much (fixed up later) */
280 if ( (rv=PyString_FromStringAndSize(NULL, bin_len*2+2)) == NULL )
281 return NULL;
282 ascii_data = (unsigned char *)PyString_AsString(rv);
283
284 /* Store the length */
285 *ascii_data++ = ' ' + (bin_len & 077);
286
287 for( ; bin_len > 0 || leftbits != 0 ; bin_len--, bin_data++ ) {
288 /* Shift the data (or padding) into our buffer */
289 if ( bin_len > 0 ) /* Data */
290 leftchar = (leftchar << 8) | *bin_data;
291 else /* Padding */
292 leftchar <<= 8;
293 leftbits += 8;
294
295 /* See if there are 6-bit groups ready */
296 while ( leftbits >= 6 ) {
297 this_ch = (leftchar >> (leftbits-6)) & 0x3f;
298 leftbits -= 6;
299 *ascii_data++ = this_ch + ' ';
300 }
301 }
302 *ascii_data++ = '\n'; /* Append a courtesy newline */
303
304 _PyString_Resize(&rv, (ascii_data -
305 (unsigned char *)PyString_AsString(rv)));
306 return rv;
307}
308
309
310static int
311binascii_find_valid(unsigned char *s, Py_ssize_t slen, int num)
312{
313 /* Finds & returns the (num+1)th
314 ** valid character for base64, or -1 if none.
315 */
316
317 int ret = -1;
318 unsigned char c, b64val;
319
320 while ((slen > 0) && (ret == -1)) {
321 c = *s;
322 b64val = table_a2b_base64[c & 0x7f];
323 if ( ((c <= 0x7f) && (b64val != (unsigned char)-1)) ) {
324 if (num == 0)
325 ret = *s;
326 num--;
327 }
328
329 s++;
330 slen--;
331 }
332 return ret;
333}
334
335PyDoc_STRVAR(doc_a2b_base64, "(ascii) -> bin. Decode a line of base64 data");
336
337static PyObject *
338binascii_a2b_base64(PyObject *self, PyObject *args)
339{
340 unsigned char *ascii_data, *bin_data;
341 int leftbits = 0;
342 unsigned char this_ch;
343 unsigned int leftchar = 0;