source: trunk/essentials/dev-lang/python/Lib/email/utils.py@ 3393

Last change on this file since 3393 was 3225, checked in by bird, 19 years ago

Python 2.5

File size: 9.7 KB
Line 
1# Copyright (C) 2001-2006 Python Software Foundation
2# Author: Barry Warsaw
3# Contact: [email protected]
4
5"""Miscellaneous utilities."""
6
7__all__ = [
8 'collapse_rfc2231_value',
9 'decode_params',
10 'decode_rfc2231',
11 'encode_rfc2231',
12 'formataddr',
13 'formatdate',
14 'getaddresses',
15 'make_msgid',
16 'parseaddr',
17 'parsedate',
18 'parsedate_tz',
19 'unquote',
20 ]
21
22import os
23import re
24import time
25import base64
26import random
27import socket
28import urllib
29import warnings
30from cStringIO import StringIO
31
32from email._parseaddr import quote
33from email._parseaddr import AddressList as _AddressList
34from email._parseaddr import mktime_tz
35
36# We need wormarounds for bugs in these methods in older Pythons (see below)
37from email._parseaddr import parsedate as _parsedate
38from email._parseaddr import parsedate_tz as _parsedate_tz
39
40from quopri import decodestring as _qdecode
41
42# Intrapackage imports
43from email.encoders import _bencode, _qencode
44
45COMMASPACE = ', '
46EMPTYSTRING = ''
47UEMPTYSTRING = u''
48CRLF = '\r\n'
49TICK = "'"
50
51specialsre = re.compile(r'[][\\()<>@,:;".]')
52escapesre = re.compile(r'[][\\()"]')
53
54
55
56
57# Helpers
58
59def _identity(s):
60 return s
61
62
63def _bdecode(s):
64 # We can't quite use base64.encodestring() since it tacks on a "courtesy
65 # newline". Blech!
66 if not s:
67 return s
68 value = base64.decodestring(s)
69 if not s.endswith('\n') and value.endswith('\n'):
70 return value[:-1]
71 return value
72
73
74
75
76def fix_eols(s):
77 """Replace all line-ending characters with \r\n."""
78 # Fix newlines with no preceding carriage return
79 s = re.sub(r'(?<!\r)\n', CRLF, s)
80 # Fix carriage returns with no following newline
81 s = re.sub(r'\r(?!\n)', CRLF, s)
82 return s
83
84
85
86
87def formataddr(pair):
88 """The inverse of parseaddr(), this takes a 2-tuple of the form
89 (realname, email_address) and returns the string value suitable
90 for an RFC 2822 From, To or Cc header.
91
92 If the first element of pair is false, then the second element is
93 returned unmodified.
94 """
95 name, address = pair
96 if name:
97 quotes = ''
98 if specialsre.search(name):
99 quotes = '"'
100 name = escapesre.sub(r'\\\g<0>', name)
101 return '%s%s%s <%s>' % (quotes, name, quotes, address)
102 return address
103
104
105
106
107def getaddresses(fieldvalues):
108 """Return a list of (REALNAME, EMAIL) for each fieldvalue."""
109 all = COMMASPACE.join(fieldvalues)
110 a = _AddressList(all)
111 return a.addresslist
112
113
114
115
116ecre = re.compile(r'''
117 =\? # literal =?
118 (?P<charset>[^?]*?) # non-greedy up to the next ? is the charset
119 \? # literal ?
120 (?P<encoding>[qb]) # either a "q" or a "b", case insensitive
121 \? # literal ?
122 (?P<atom>.*?) # non-greedy up to the next ?= is the atom
123 \?= # literal ?=
124 ''', re.VERBOSE | re.IGNORECASE)
125
126
127
128
129def formatdate(timeval=None, localtime=False, usegmt=False):
130 """Returns a date string as specified by RFC 2822, e.g.:
131
132 Fri, 09 Nov 2001 01:08:47 -0000
133
134 Optional timeval if given is a floating point time value as accepted by
135 gmtime() and localtime(), otherwise the current time is used.
136
137 Optional localtime is a flag that when True, interprets timeval, and
138 returns a date relative to the local timezone instead of UTC, properly
139 taking daylight savings time into account.
140
141 Optional argument usegmt means that the timezone is written out as
142 an ascii string, not numeric one (so "GMT" instead of "+0000"). This
143 is needed for HTTP, and is only used when localtime==False.
144 """
145 # Note: we cannot use strftime() because that honors the locale and RFC
146 # 2822 requires that day and month names be the English abbreviations.
147 if timeval is None:
148 timeval = time.time()
149 if localtime:
150 now = time.localtime(timeval)
151 # Calculate timezone offset, based on whether the local zone has
152 # daylight savings time, and whether DST is in effect.
153 if time.daylight and now[-1]:
154 offset = time.altzone
155 else:
156 offset = time.timezone
157 hours, minutes = divmod(abs(offset), 3600)
158 # Remember offset is in seconds west of UTC, but the timezone is in
159 # minutes east of UTC, so the signs differ.
160 if offset > 0:
161 sign = '-'
162 else:
163 sign = '+'
164 zone = '%s%02d%02d' % (sign, hours, minutes // 60)
165 else:
166 now = time.gmtime(timeval)
167 # Timezone offset is always -0000
168 if usegmt:
169 zone = 'GMT'
170 else:
171 zone = '-0000'
172 return '%s, %02d %s %04d %02d:%02d:%02d %s' % (
173 ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'][now[6]],
174 now[2],
175 ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
176 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'][now[1] - 1],
177 now[0], now[3], now[4], now[5],
178 zone)
179
180
181
182
183def make_msgid(idstring=None):
184 """Returns a string suitable for RFC 2822 compliant Message-ID, e.g:
185
186 <[email protected]>
187
188 Optional idstring if given is a string used to strengthen the
189 uniqueness of the message id.
190 """
191 timeval = time.time()
192 utcdate = time.strftime('%Y%m%d%H%M%S', time.gmtime(timeval))
193 pid = os.getpid()
194 randint = random.randrange(100000)
195 if idstring is None:
196 idstring = ''
197 else:
198 idstring = '.' + idstring
199 idhost = socket.getfqdn()
200 msgid = '<%s.%s.%s%s@%s>' % (utcdate, pid, randint, idstring, idhost)
201 return msgid
202
203
204
205
206# These functions are in the standalone mimelib version only because they've
207# subsequently been fixed in the latest Python versions. We use this to worm
208# around broken older Pythons.
209def parsedate(data):
210 if not data:
211 return None
212 return _parsedate(data)
213
214
215def parsedate_tz(data):
216 if not data:
217 return None
218 return _parsedate_tz(data)
219
220
221def parseaddr(addr):
222 addrs = _AddressList(addr).addresslist
223 if not addrs:
224 return '', ''
225 return addrs[0]
226
227
228# rfc822.unquote() doesn't properly de-backslash-ify in Python pre-2.3.
229def unquote(str):
230 """Remove quotes from a string."""
231 if len(str) > 1:
232 if str.startswith('"') and str.endswith('"'):
233 return str[1:-1].replace('\\\\', '\\').replace('\\"', '"')
234 if str.startswith('<') and str.endswith('>'):
235 return str[1:-1]
236 return str
237
238
239
240
241# RFC2231-related functions - parameter encoding and decoding
242def decode_rfc2231(s):
243 """Decode string according to RFC 2231"""
244 parts = s.split(TICK, 2)
245 if len(parts) <= 2:
246 return None, None, s
247 if len(parts) > 3:
248 charset, language = parts[:2]
249 s = TICK.join(parts[2:])
250 return charset, language, s
251 return parts
252
253
254def encode_rfc2231(s, charset=None, language=None):
255 """Encode string according to RFC 2231.
256
257 If neither charset nor language is given, then s is returned as-is. If
258 charset is given but not language, the string is encoded using the empty
259 string for language.
260 """
261 import urllib
262 s = urllib.quote(s, safe='')
263 if charset is None and language is None:
264 return s
265 if language is None:
266 language = ''
267 return "%s'%s'%s" % (charset, language, s)
268
269
270rfc2231_continuation = re.compile(r'^(?P<name>\w+)\*((?P<num>[0-9]+)\*?)?$')
271
272def decode_params(params):
273 """Decode parameters list according to RFC 2231.
274
275 params is a sequence of 2-tuples containing (param name, string value).
276 """
277 # Copy params so we don't mess with the original
278 params = params[:]
279 new_params = []
280 # Map parameter's name to a list of continuations. The values are a
281 # 3-tuple of the continuation number, the string value, and a flag
282 # specifying whether a particular segment is %-encoded.
283 rfc2231_params = {}
284 name, value = params.pop(0)
285 new_params.append((name, value))
286 while params:
287 name, value = params.pop(0)
288 if name.endswith('*'):
289 encoded = True
290 else:
291 encoded = False
292 value = unquote(value)
293 mo = rfc2231_continuation.match(name)
294 if mo:
295 name, num = mo.group('name', 'num')
296 if num is not None:
297 num = int(num)
298 rfc2231_params.setdefault(name, []).append((num, value, encoded))
299 else:
300 new_params.append((name, '"%s"' % quote(value)))
301 if rfc2231_params:
302 for name, continuations in rfc2231_params.items():
303 value = []
304 extended = False
305 # Sort by number
306 continuations.sort()
307 # And now append all values in numerical order, converting
308 # %-encodings for the encoded segments. If any of the
309 # continuation names ends in a *, then the entire string, after
310 # decoding segments and concatenating, must have the charset and
311 # language specifiers at the beginning of the string.
312 for num, s, encoded in continuations:
313 if encoded:
314 s = urllib.unquote(s)
315 extended = True
316 value.append(s)
317 value = quote(EMPTYSTRING.join(value))
318 if extended:
319 charset, language, value = decode_rfc2231(value)
320 new_params.append((name, (charset, language, '"%s"' % value)))
321 else:
322 new_params.append((name, '"%s"' % value))
323 return new_params
324
325def collapse_rfc2231_value(value, errors='replace',
326 fallback_charset='us-ascii'):
327 if isinstance(value, tuple):
328 rawval = unquote(value[2])
329 charset = value[0] or 'us-ascii'
330 try:
331 return unicode(rawval, charset, errors)
332 except LookupError:
333 # XXX charset is unknown to Python.
334 return unicode(rawval, fallback_charset, errors)
335 else:
336 return unquote(value)
Note: See TracBrowser for help on using the repository browser.