Context Navigation

← Previous Revision
Latest Revision
Next Revision →
Blame
Revision Log

generator.py@ 3225

Visit:

Last change on this file since 3225 was 3225, checked in by bird, 19 years ago
Python 2.5
File size: 12.8 KB

Line
1	# Copyright (C) 2001-2006 Python Software Foundation
2	# Author: Barry Warsaw
3	# Contact: [email protected]
4
5	"""Classes to generate plain text from a message object tree."""
6
7	__all__ = ['Generator', 'DecodedGenerator']
8
9	import re
10	import sys
11	import time
12	import random
13	import warnings
14
15	from cStringIO import StringIO
16	from email.header import Header
17
18	UNDERSCORE = '_'
19	NL = '\n'
20
21	fcre = re.compile(r'^From ', re.MULTILINE)
22
23	def _is8bitstring(s):
24	if isinstance(s, str):
25	try:
26	unicode(s, 'us-ascii')
27	except UnicodeError:
28	return True
29	return False
30
31
32
33
34	class Generator:
35	"""Generates output from a Message object tree.
36
37	This basic generator writes the message to the given file object as plain
38	text.
39	"""
40	#
41	# Public interface
42	#
43
44	def __init__(self, outfp, mangle_from_=True, maxheaderlen=78):
45	"""Create the generator for message flattening.
46
47	outfp is the output file-like object for writing the message to. It
48	must have a write() method.
49
50	Optional mangle_from_ is a flag that, when True (the default), escapes
51	From_ lines in the body of the message by putting a `>' in front of
52	them.
53
54	Optional maxheaderlen specifies the longest length for a non-continued
55	header. When a header line is longer (in characters, with tabs
56	expanded to 8 spaces) than maxheaderlen, the header will split as
57	defined in the Header class. Set maxheaderlen to zero to disable
58	header wrapping. The default is 78, as recommended (but not required)
59	by RFC 2822.
60	"""
61	self._fp = outfp
62	self._mangle_from_ = mangle_from_
63	self._maxheaderlen = maxheaderlen
64
65	def write(self, s):
66	# Just delegate to the file object
67	self._fp.write(s)
68
69	def flatten(self, msg, unixfrom=False):
70	"""Print the message object tree rooted at msg to the output file
71	specified when the Generator instance was created.
72
73	unixfrom is a flag that forces the printing of a Unix From_ delimiter
74	before the first object in the message tree. If the original message
75	has no From_ delimiter, a `standard' one is crafted. By default, this
76	is False to inhibit the printing of any From_ delimiter.
77
78	Note that for subobjects, no From_ line is printed.
79	"""
80	if unixfrom:
81	ufrom = msg.get_unixfrom()
82	if not ufrom:
83	ufrom = 'From nobody ' + time.ctime(time.time())
84	print >> self._fp, ufrom
85	self._write(msg)
86
87	def clone(self, fp):
88	"""Clone this generator with the exact same options."""
89	return self.__class__(fp, self._mangle_from_, self._maxheaderlen)
90
91	#
92	# Protected interface - undocumented ;/
93	#
94
95	def _write(self, msg):
96	# We can't write the headers yet because of the following scenario:
97	# say a multipart message includes the boundary string somewhere in
98	# its body. We'd have to calculate the new boundary /before/ we write
99	# the headers so that we can write the correct Content-Type:
100	# parameter.
101	#
102	# The way we do this, so as to make the _handle_*() methods simpler,
103	# is to cache any subpart writes into a StringIO. The we write the
104	# headers and the StringIO contents. That way, subpart handlers can
105	# Do The Right Thing, and can still modify the Content-Type: header if
106	# necessary.
107	oldfp = self._fp
108	try:
109	self._fp = sfp = StringIO()
110	self._dispatch(msg)
111	finally:
112	self._fp = oldfp
113	# Write the headers. First we see if the message object wants to
114	# handle that itself. If not, we'll do it generically.
115	meth = getattr(msg, '_write_headers', None)
116	if meth is None:
117	self._write_headers(msg)
118	else:
119	meth(self)
120	self._fp.write(sfp.getvalue())
121
122	def _dispatch(self, msg):
123	# Get the Content-Type: for the message, then try to dispatch to
124	# self._handle_<maintype>_<subtype>(). If there's no handler for the
125	# full MIME type, then dispatch to self._handle_<maintype>(). If
126	# that's missing too, then dispatch to self._writeBody().
127	main = msg.get_content_maintype()
128	sub = msg.get_content_subtype()
129	specific = UNDERSCORE.join((main, sub)).replace('-', '_')
130	meth = getattr(self, '_handle_' + specific, None)
131	if meth is None:
132	generic = main.replace('-', '_')
133	meth = getattr(self, '_handle_' + generic, None)
134	if meth is None:
135	meth = self._writeBody
136	meth(msg)
137
138	#
139	# Default handlers
140	#
141
142	def _write_headers(self, msg):
143	for h, v in msg.items():
144	print >> self._fp, '%s:' % h,
145	if self._maxheaderlen == 0:
146	# Explicit no-wrapping
147	print >> self._fp, v
148	elif isinstance(v, Header):
149	# Header instances know what to do
150	print >> self._fp, v.encode()
151	elif _is8bitstring(v):
152	# If we have raw 8bit data in a byte string, we have no idea
153	# what the encoding is. There is no safe way to split this
154	# string. If it's ascii-subset, then we could do a normal
155	# ascii split, but if it's multibyte then we could break the
156	# string. There's no way to know so the least harm seems to
157	# be to not split the string and risk it being too long.
158	print >> self._fp, v
159	else:
160	# Header's got lots of smarts, so use it.
161	print >> self._fp, Header(
162	v, maxlinelen=self._maxheaderlen,
163	header_name=h, continuation_ws='\t').encode()
164	# A blank line always separates headers from body
165	print >> self._fp
166
167	#
168	# Handlers for writing types and subtypes
169	#
170
171	def _handle_text(self, msg):
172	payload = msg.get_payload()
173	if payload is None:
174	return
175	if not isinstance(payload, basestring):
176	raise TypeError('string payload expected: %s' % type(payload))
177	if self._mangle_from_:
178	payload = fcre.sub('>From ', payload)
179	self._fp.write(payload)
180
181	# Default body handler
182	_writeBody = _handle_text
183
184	def _handle_multipart(self, msg):
185	# The trick here is to write out each part separately, merge them all
186	# together, and then make sure that the boundary we've chosen isn't
187	# present in the payload.
188	msgtexts = []
189	subparts = msg.get_payload()
190	if subparts is None:
191	subparts = []
192	elif isinstance(subparts, basestring):
193	# e.g. a non-strict parse of a message with no starting boundary.
194	self._fp.write(subparts)
195	return
196	elif not isinstance(subparts, list):
197	# Scalar payload
198	subparts = [subparts]
199	for part in subparts:
200	s = StringIO()
201	g = self.clone(s)
202	g.flatten(part, unixfrom=False)
203	msgtexts.append(s.getvalue())
204	# Now make sure the boundary we've selected doesn't appear in any of
205	# the message texts.
206	alltext = NL.join(msgtexts)
207	# BAW: What about boundaries that are wrapped in double-quotes?
208	boundary = msg.get_boundary(failobj=_make_boundary(alltext))
209	# If we had to calculate a new boundary because the body text
210	# contained that string, set the new boundary. We don't do it
211	# unconditionally because, while set_boundary() preserves order, it
212	# doesn't preserve newlines/continuations in headers. This is no big
213	# deal in practice, but turns out to be inconvenient for the unittest
214	# suite.
215	if msg.get_boundary() <> boundary:
216	msg.set_boundary(boundary)
217	# If there's a preamble, write it out, with a trailing CRLF
218	if msg.preamble is not None:
219	print >> self._fp, msg.preamble
220	# dash-boundary transport-padding CRLF
221	print >> self._fp, '--' + boundary
222	# body-part
223	if msgtexts:
224	self._fp.write(msgtexts.pop(0))
225	# *encapsulation
226	# --> delimiter transport-padding
227	# --> CRLF body-part
228	for body_part in msgtexts:
229	# delimiter transport-padding CRLF
230	print >> self._fp, '\n--' + boundary
231	# body-part
232	self._fp.write(body_part)
233	# close-delimiter transport-padding
234	self._fp.write('\n--' + boundary + '--')
235	if msg.epilogue is not None:
236	print >> self._fp
237	self._fp.write(msg.epilogue)
238
239	def _handle_message_delivery_status(self, msg):
240	# We can't just write the headers directly to self's file object
241	# because this will leave an extra newline between the last header
242	# block and the boundary. Sigh.
243	blocks = []
244	for part in msg.get_payload():
245	s = StringIO()
246	g = self.clone(s)
247	g.flatten(part, unixfrom=False)
248	text = s.getvalue()
249	lines = text.split('\n')
250	# Strip off the unnecessary trailing empty line
251	if lines and lines[-1] == '':
252	blocks.append(NL.join(lines[:-1]))
253	else:
254	blocks.append(text)
255	# Now join all the blocks with an empty line. This has the lovely
256	# effect of separating each block with an empty line, but not adding
257	# an extra one after the last one.
258	self._fp.write(NL.join(blocks))
259
260	def _handle_message(self, msg):
261	s = StringIO()
262	g = self.clone(s)
263	# The payload of a message/rfc822 part should be a multipart sequence
264	# of length 1. The zeroth element of the list should be the Message
265	# object for the subpart. Extract that object, stringify it, and
266	# write it out.
267	g.flatten(msg.get_payload(0), unixfrom=False)
268	self._fp.write(s.getvalue())
269
270
271
272
273	_FMT = '[Non-text (%(type)s) part of message omitted, filename %(filename)s]'
274
275	class DecodedGenerator(Generator):
276	"""Generator a text representation of a message.
277
278	Like the Generator base class, except that non-text parts are substituted
279	with a format string representing the part.
280	"""
281	def __init__(self, outfp, mangle_from_=True, maxheaderlen=78, fmt=None):
282	"""Like Generator.__init__() except that an additional optional
283	argument is allowed.
284
285	Walks through all subparts of a message. If the subpart is of main
286	type `text', then it prints the decoded payload of the subpart.
287
288	Otherwise, fmt is a format string that is used instead of the message
289	payload. fmt is expanded with the following keywords (in
290	%(keyword)s format):
291
292	type : Full MIME type of the non-text part
293	maintype : Main MIME type of the non-text part
294	subtype : Sub-MIME type of the non-text part
295	filename : Filename of the non-text part
296	description: Description associated with the non-text part
297	encoding : Content transfer encoding of the non-text part
298
299	The default value for fmt is None, meaning
300
301	[Non-text (%(type)s) part of message omitted, filename %(filename)s]
302	"""
303	Generator.__init__(self, outfp, mangle_from_, maxheaderlen)
304	if fmt is None:
305	self._fmt = _FMT
306	else:
307	self._fmt = fmt
308
309	def _dispatch(self, msg):
310	for part in msg.walk():
311	maintype = part.get_content_maintype()
312	if maintype == 'text':
313	print >> self, part.get_payload(decode=True)
314	elif maintype == 'multipart':
315	# Just skip this
316	pass
317	else:
318	print >> self, self._fmt % {
319	'type' : part.get_content_type(),
320	'maintype' : part.get_content_maintype(),
321	'subtype' : part.get_content_subtype(),
322	'filename' : part.get_filename('[no filename]'),
323	'description': part.get('Content-Description',
324	'[no description]'),
325	'encoding' : part.get('Content-Transfer-Encoding',
326	'[no encoding]'),
327	}
328
329
330
331
332	# Helper
333	_width = len(repr(sys.maxint-1))
334	_fmt = '%%0%dd' % _width
335
336	def _make_boundary(text=None):
337	# Craft a random boundary. If text is given, ensure that the chosen
338	# boundary doesn't appear in the text.
339	token = random.randrange(sys.maxint)
340	boundary = ('=' * 15) + (_fmt % token) + '=='
341	if text is None:
342	return boundary
343	b = boundary
344	counter = 0
345	while True:
346	cre = re.compile('^--' + re.escape(b) + '(--)?$', re.MULTILINE)
347	if not cre.search(text):
348	break
349	b = boundary + '.' + str(counter)
350	counter += 1
351	return b

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: vendor/python/2.5/Lib/email/generator.py@ 3225

Download in other formats: