trac.mimeview.api

1 # -*- coding: utf-8 -*- 2 # 3 # Copyright (C) 2004-2020 Edgewall Software 4 # Copyright (C) 2004 Daniel Lundin <[email protected]> 5 # Copyright (C) 2005-2006 Christopher Lenz <[email protected]> 6 # Copyright (C) 2006-2007 Christian Boos <[email protected]> 7 # All rights reserved. 8 # 9 # This software is licensed as described in the file COPYING, which 10 # you should have received as part of this distribution. The terms 11 # are also available at https://trac.edgewall.org/wiki/TracLicense. 12 # 13 # This software consists of voluntary contributions made by many 14 # individuals. For the exact contribution history, see the revision 15 # history and logs, available at https://trac.edgewall.org/log/. 16 # 17 # Author: Daniel Lundin <[email protected]> 18 # Christopher Lenz <[email protected]> 19 # Christian Boos <[email protected]> 20 21 """ 22 File metadata management 23 ------------------------ 24 25 The `trac.mimeview` package centralizes the intelligence related to 26 file metadata, principally concerning the `type` (MIME type) of the 27 content and, if relevant, concerning the text encoding (charset) used 28 by the content. 29 30 There are primarily two approaches for getting the MIME type of a 31 given file, either taking advantage of existing conventions for the 32 file name, or examining the file content and applying various 33 heuristics. 34 35 The module also knows how to convert the file content from one type to 36 another type. 37 38 In some cases, only the `url` pointing to the file's content is 39 actually needed, that's why we avoid to read the file's content when 40 it's not needed. 41 42 The actual `content` to be converted might be a `unicode` object, but 43 it can also be the raw byte string (`str`) object, or simply an object 44 that can be `read()`. 45 46 .. note:: (for plugin developers) 47 48 The Mimeview API is quite complex and many things there are 49 currently a bit difficult to work with (e.g. what an actual 50 `content` might be, see the last paragraph of this description). 51 52 So this area is mainly in a ''work in progress'' state, which will 53 be improved along the lines described in :teo:`#3332`. 54 55 In particular, if you are interested in writing `IContentConverter` 56 and `IHTMLPreviewRenderer` components, note that those interfaces 57 will be merged into a new style `IContentConverter`. Feel free to 58 contribute remarks and suggestions for improvements to the 59 corresponding ticket (#3332 as well). 60 """ 61 62 import re 63 from StringIO import StringIO 64 65 from genshi import Markup, Stream 66 from genshi.core import TEXT, START, END, START_NS, END_NS 67 from genshi.builder import Fragment, tag 68 from genshi.input import HTMLParser 69 70 from trac.config import IntOption, ListOption, Option 71 from trac.core import Component, ExtensionPoint, Interface, TracError, \ 72 implements 73 from trac.resource import Resource 74 from trac.util import Ranges, content_disposition 75 from trac.util.text import exception_to_unicode, to_utf8, to_unicode 76 from trac.util.translation import _, tag_ 77 78 79 __all__ = ['Context', 'Mimeview', 'RenderingContext', 'get_mimetype', 80 'is_binary', 'detect_unicode', 'content_to_unicode', 'ct_mimetype']

81 82 83 -class RenderingContext(object):

84 """A rendering context specifies ''how'' the content should be rendered. 85 86 It holds together all the needed contextual information that will be 87 needed by individual renderer components. 88 89 To that end, a context keeps track of the Href instance 90 (``.href``) which should be used as a base for building URLs. 91 92 It also provides a `PermissionCache` (``.perm``) which can be used 93 to restrict the output so that only the authorized information is 94 shown. 95 96 A rendering context may also be associated to some Trac resource which 97 will be used as the implicit reference when rendering relative links 98 or for retrieving relative content and can be used to retrieve related 99 metadata. 100 101 Rendering contexts can be nested, and a new context can be created from 102 an existing context using the call syntax. The previous context can be 103 retrieved using the ``.parent`` attribute. 104 105 For example, when rendering a wiki text of a wiki page, the context will 106 be associated to a resource identifying that wiki page. 107 108 If that wiki text contains a `[[TicketQuery]]` wiki macro, the macro will 109 set up nested contexts for each matching ticket that will be used for 110 rendering the ticket descriptions. 111 112 :since: version 1.0 113 114 """ 115

116 - def __init__(self, resource, href=None, perm=None):

117 """Directly create a `RenderingContext`. 118 119 :param resource: the associated resource 120 :type resource: `Resource` 121 :param href: an `Href` object suitable for creating URLs 122 :param perm: a `PermissionCache` object used for restricting the 123 generated output to "authorized" information only. 124 125 The actual `.perm` attribute of the rendering context will be bound 126 to the given `resource` so that fine-grained permission checks will 127 apply to that. 128 """ 129 self.parent = None #: The parent context, if any 130 self.resource = resource 131 self.href = href 132 self.perm = perm(resource) if perm and resource else perm 133 self._hints = None

134 135 @staticmethod

136 - def from_request(*args, **kwargs):

137 """ 138 :deprecated: since 1.0, use `web_context` instead. Will be removed 139 in release 1.3.1. 140 """ 141 from trac.web.chrome import web_context 142 return web_context(*args, **kwargs)

143

144 - def __repr__(self):

145 path = [] 146 context = self 147 while context: 148 if context.resource.realm: # skip toplevel resource 149 path.append(repr(context.resource)) 150 context = context.parent 151 return '<%s %s>' % (type(self).__name__, ' - '.join(reversed(path)))

152

153 - def child(self, resource=None, id=False, version=False, parent=False):

154 """Create a nested rendering context. 155 156 `self` will be the parent for the new nested context. 157 158 :param resource: either a `Resource` object or the realm string for a 159 resource specification to be associated to the new 160 context. If `None`, the resource will be the same 161 as the resource of the parent context. 162 :param id: the identifier part of the resource specification 163 :param version: the version of the resource specification 164 :return: the new context object 165 :rtype: `RenderingContext` 166 167 >>> context = RenderingContext('wiki', 'WikiStart') 168 >>> ticket1 = Resource('ticket', 1) 169 >>> context.child('ticket', 1).resource == ticket1 170 True 171 >>> context.child(ticket1).resource is ticket1 172 True 173 >>> context.child(ticket1)().resource is ticket1 174 True 175 """ 176 if resource: 177 resource = Resource(resource, id=id, version=version, 178 parent=parent) 179 else: 180 resource = self.resource 181 context = RenderingContext(resource, href=self.href, perm=self.perm) 182 context.parent = self 183 184 # hack for context instances created by from_request() 185 # this is needed because various parts of the code rely on a request 186 # object being available, but that will hopefully improve in the 187 # future 188 if hasattr(self, 'req'): 189 context.req = self.req 190 191 return context

192 193 __call__ = child 194

195 - def __contains__(self, resource):

196 """Check whether a resource is in the rendering path. 197 198 The primary use for this check is to avoid to render the content of a 199 resource if we're already embedded in a context associated to that 200 resource. 201 202 :param resource: a `Resource` specification which will be checked for 203 """ 204 context = self 205 while context: 206 if context.resource and \ 207 context.resource.realm == resource.realm and \ 208 context.resource.id == resource.id: 209 # we don't care about version here 210 return True 211 context = context.parent

212 213 # Rendering hints 214 # 215 # A rendering hint is a key/value pairs that can influence renderers, 216 # wiki formatters and processors in the way they produce their output. 217 # The keys are strings, but the values could be anything. 218 # 219 # In nested contexts, the hints are inherited from their parent context, 220 # unless overridden locally. 221

222 - def set_hints(self, **keyvalues):

223 """Set rendering hints for this rendering context. 224 225 >>> ctx = RenderingContext('timeline') 226 >>> ctx.set_hints(wiki_flavor='oneliner', shorten_lines=True) 227 >>> t_ctx = ctx('ticket', 1) 228 >>> t_ctx.set_hints(wiki_flavor='html', preserve_newlines=True) 229 >>> (t_ctx.get_hint('wiki_flavor'), t_ctx.get_hint('shorten_lines'), \ 230 t_ctx.get_hint('preserve_newlines')) 231 ('html', True, True) 232 >>> (ctx.get_hint('wiki_flavor'), ctx.get_hint('shorten_lines'), \ 233 ctx.get_hint('preserve_newlines')) 234 ('oneliner', True, None) 235 """ 236 if self._hints is None: 237 self._hints = {} 238 hints = self._parent_hints() 239 if hints is not None: 240 self._hints.update(hints) 241 self._hints.update(keyvalues)

242

243 - def get_hint(self, hint, default=None):

244 """Retrieve a rendering hint from this context or an ancestor context. 245 246 >>> ctx = RenderingContext('timeline') 247 >>> ctx.set_hints(wiki_flavor='oneliner') 248 >>> t_ctx = ctx('ticket', 1) 249 >>> t_ctx.get_hint('wiki_flavor') 250 'oneliner' 251 >>> t_ctx.get_hint('preserve_newlines', True) 252 True 253 """ 254 hints = self._hints 255 if hints is None: 256 hints = self._parent_hints() 257 if hints is None: 258 return default 259 return hints.get(hint, default)

260

261 - def has_hint(self, hint):

262 """Test whether a rendering hint is defined in this context or in some 263 ancestor context. 264 265 >>> ctx = RenderingContext('timeline') 266 >>> ctx.set_hints(wiki_flavor='oneliner') 267 >>> t_ctx = ctx('ticket', 1) 268 >>> t_ctx.has_hint('wiki_flavor') 269 True 270 >>> t_ctx.has_hint('preserve_newlines') 271 False 272 """ 273 hints = self._hints 274 if hints is None: 275 hints = self._parent_hints() 276 if hints is None: 277 return False 278 return hint in hints

279

280 - def _parent_hints(self):

281 p = self.parent 282 while p and p._hints is None: 283 p = p.parent 284 return p and p._hints

285

286 287 -class Context(RenderingContext):

288 """ 289 :deprecated: since 1.0, use `RenderingContext` instead. `Context` is 290 kept for compatibility and will be removed release 1.3.1. 291 """

292 293 294 # Some common MIME types and their associated keywords and/or file extensions 295 296 KNOWN_MIME_TYPES = { 297 'application/javascript': 'js', 298 'application/msword': 'doc dot', 299 'application/pdf': 'pdf', 300 'application/postscript': 'ps', 301 'application/rtf': 'rtf', 302 'application/x-dos-batch': 'bat batch cmd dos', 303 'application/x-sh': 'sh', 304 'application/x-csh': 'csh', 305 'application/x-genshi': 'genshi', 306 'application/x-troff': 'nroff roff troff', 307 'application/x-yaml': 'yml yaml', 308 309 'application/rss+xml': 'rss', 310 'application/xsl+xml': 'xsl', 311 'application/xslt+xml': 'xslt', 312 313 'image/x-icon': 'ico', 314 'image/svg+xml': 'svg', 315 316 'model/vrml': 'vrml wrl', 317 318 'text/css': 'css', 319 'text/html': 'html htm', 320 'text/plain': 'txt TXT text README INSTALL ' 321 'AUTHORS COPYING ChangeLog RELEASE', 322 'text/xml': 'xml', 323 324 # see also TEXT_X_TYPES below 325 'text/x-apacheconf': 'apache', 326 'text/x-csrc': 'c xs', 327 'text/x-chdr': 'h', 328 'text/x-c++src': 'cc CC cpp C c++ C++', 329 'text/x-c++hdr': 'hh HH hpp H', 330 'text/x-csharp': 'cs c# C#', 331 'text/x-diff': 'patch', 332 'text/x-eiffel': 'e', 333 'text/x-elisp': 'el', 334 'text/x-fortran': 'f', 335 'text/x-haskell': 'hs', 336 'text/x-ini': 'ini cfg', 337 'text/x-nginx-conf': 'nginx', 338 'text/x-objc': 'm mm', 339 'text/x-ocaml': 'ml mli', 340 'text/x-makefile': 'make mk Makefile GNUMakefile', 341 'text/x-pascal': 'pas', 342 'text/x-perl': 'pl pm PL', 343 'text/x-php': 'php3 php4', 344 'text/x-python': 'py', 345 'text/x-python-doctest': 'pycon', 346 'text/x-pyrex': 'pyx', 347 'text/x-ruby': 'rb', 348 'text/x-scheme': 'scm', 349 'text/x-textile': 'txtl', 350 'text/x-vba': 'vb vba bas', 351 'text/x-verilog': 'v', 352 'text/x-vhdl': 'vhd', 353 } 354 for t in KNOWN_MIME_TYPES.keys(): 355 types = KNOWN_MIME_TYPES[t].split() 356 if t.startswith('text/x-'): 357 types.append(t[len('text/x-'):]) 358 KNOWN_MIME_TYPES[t] = types 359 360 # extend the above with simple (text/x-<something>: <something>) mappings 361 362 TEXT_X_TYPES = """ 363 ada asm asp awk idl inf java ksh lua m4 mail psp rfc rst sql tcl tex zsh 364 """ 365 for x in TEXT_X_TYPES.split(): 366 KNOWN_MIME_TYPES.setdefault('text/x-%s' % x, []).append(x) 367 368 369 # Default mapping from keywords/extensions to known MIME types: 370 371 MIME_MAP = {} 372 for t, exts in KNOWN_MIME_TYPES.items(): 373 MIME_MAP[t] = t 374 for e in exts: 375 MIME_MAP[e] = t 376 377 # Simple builtin autodetection from the content using a regexp 378 MODE_RE = re.compile(r""" 379 \#!.+?env\s+(\w+) # 1. look for shebang with env 380 | \#!(?:[/\w.-_]+/)?(\w+) # 2. look for regular shebang 381 | -\*-\s*(?:mode:\s*)?([\w+-]+)\s*-\*- # 3. look for Emacs' -*- mode -*- 382 | vim:.*?(?:syntax|filetype|ft)=(\w+) # 4. look for VIM's syntax=<n> 383 """, re.VERBOSE)

384 385 386 -def get_mimetype(filename, content=None, mime_map=MIME_MAP, 387 mime_map_patterns={}):

388 """Guess the most probable MIME type of a file with the given name. 389 390 `filename` is either a filename (the lookup will then use the suffix) 391 or some arbitrary keyword. 392 393 `content` is either a `str` or an `unicode` string. 394 """ 395 # 0) mimetype from filename pattern (most specific) 396 for mimetype, regexp in mime_map_patterns.iteritems(): 397 if regexp.match(filename): 398 return mimetype 399 suffix = filename.split('.')[-1] 400 if suffix in mime_map: 401 # 1) mimetype from the suffix, using the `mime_map` 402 return mime_map[suffix] 403 else: 404 mimetype = None 405 try: 406 import mimetypes 407 # 2) mimetype from the suffix, using the `mimetypes` module 408 mimetype = mimetypes.guess_type(filename)[0] 409 except Exception: 410 pass 411 if not mimetype and content: 412 match = re.search(MODE_RE, content[:1000] + content[-1000:]) 413 if match: 414 mode = match.group(1) or match.group(2) or match.group(4) or \ 415 match.group(3).lower() 416 if mode in mime_map: 417 # 3) mimetype from the content, using the `MODE_RE` 418 return mime_map[mode] 419 else: 420 if is_binary(content): 421 # 4) mimetype from the content, using`is_binary` 422 return 'application/octet-stream' 423 return mimetype

424

425 426 -def ct_mimetype(content_type):

427 """Return the mimetype part of a content type.""" 428 return (content_type or '').split(';')[0].strip()

429

430 431 -def is_binary(data):

432 """Detect binary content by checking the first thousand bytes for zeroes. 433 434 Operate on either `str` or `unicode` strings. 435 """ 436 if isinstance(data, str) and detect_unicode(data): 437 return False 438 return '\0' in data[:1000]

439

440 441 -def detect_unicode(data):

442 """Detect different unicode charsets by looking for BOMs (Byte Order Mark). 443 444 Operate obviously only on `str` objects. 445 """ 446 if data.startswith('\xff\xfe'): 447 return 'utf-16-le' 448 elif data.startswith('\xfe\xff'): 449 return 'utf-16-be' 450 elif data.startswith('\xef\xbb\xbf'): 451 return 'utf-8' 452 else: 453 return None

454

455 456 -def content_to_unicode(env, content, mimetype):

457 """Retrieve an `unicode` object from a `content` to be previewed. 458 459 In case the raw content had an unicode BOM, we remove it. 460 461 >>> from trac.test import EnvironmentStub 462 >>> env = EnvironmentStub() 463 >>> content_to_unicode(env, u"\ufeffNo BOM! h\u00e9 !", '') 464 u'No BOM! h\\xe9 !' 465 >>> content_to_unicode(env, "\xef\xbb\xbfNo BOM! h\xc3\xa9 !", '') 466 u'No BOM! h\\xe9 !' 467 468 """ 469 mimeview = Mimeview(env) 470 if hasattr(content, 'read'): 471 content = content.read(mimeview.max_preview_size) 472 u = mimeview.to_unicode(content, mimetype) 473 if u and u[0] == u'\ufeff': 474 u = u[1:] 475 return u

476

477 478 -class IHTMLPreviewRenderer(Interface):

479 """Extension point interface for components that add HTML renderers of 480 specific content types to the `Mimeview` component. 481 482 .. note:: 483 484 This interface will be merged with IContentConverter, as 485 conversion to text/html will simply be a particular content 486 conversion. 487 488 Note however that the IHTMLPreviewRenderer will still be 489 supported for a while through an adapter, whereas the 490 IContentConverter interface itself will be changed. 491 492 So if all you want to do is convert to HTML and don't feel like 493 following the API changes, you should rather implement this 494 interface for the time being. 495 """ 496 497 #: implementing classes should set this property to True if they 498 #: support text content where Trac should expand tabs into spaces 499 expand_tabs = False 500 501 #: indicate whether the output of this renderer is source code that can 502 #: be decorated with annotations 503 returns_source = False 504

505 - def get_extra_mimetypes():

506 """Augment the Mimeview system with new mimetypes associations. 507 508 This is an optional method. Not implementing the method or 509 returning nothing is fine, the component will still be asked 510 via `get_quality_ratio` if it supports a known mimetype. But 511 implementing it can be useful when the component knows about 512 additional mimetypes which may augment the list of already 513 mimetype to keywords associations. 514 515 Generate ``(mimetype, keywords)`` pairs for each additional 516 mimetype, with ``keywords`` being a list of keywords or 517 extensions that can be used as aliases for the mimetype 518 (typically file suffixes or Wiki processor keys). 519 520 .. versionadded:: 1.0 521 """

522

523 - def get_quality_ratio(mimetype):

524 """Return the level of support this renderer provides for the `content` 525 of the specified MIME type. The return value must be a number between 526 0 and 9, where 0 means no support and 9 means "perfect" support. 527 """

528

529 - def render(context, mimetype, content, filename=None, url=None):

530 """Render an XHTML preview of the raw ``content`` in a 531 `RenderingContext`. 532 533 The `content` might be: 534 * a `str` object 535 * an `unicode` string 536 * any object with a `read` method, returning one of the above 537 538 It is assumed that the content will correspond to the given `mimetype`. 539 540 Besides the `content` value, the same content may eventually 541 be available through the `filename` or `url` parameters. 542 This is useful for renderers that embed objects, using <object> or 543 <img> instead of including the content inline. 544 545 Can return the generated XHTML text as a single string or as an 546 iterable that yields strings. In the latter case, the list will 547 be considered to correspond to lines of text in the original content. 548 549 """

550

551 552 -class IHTMLPreviewAnnotator(Interface):

553 """Extension point interface for components that can annotate an XHTML 554 representation of file contents with additional information.""" 555

556 - def get_annotation_type():

557 """Return a (type, label, description) tuple 558 that defines the type of annotation and provides human readable names. 559 The `type` element should be unique to the annotator. 560 The `label` element is used as column heading for the table, 561 while `description` is used as a display name to let the user 562 toggle the appearance of the annotation type. 563 """

564

565 - def get_annotation_data(context):

566 """Return some metadata to be used by the `annotate_row` method below. 567 568 This will be called only once, before lines are processed. 569 If this raises an error, that annotator won't be used. 570 """

571

572 - def annotate_row(context, row, number, line, data):

573 """Return the XHTML markup for the table cell that contains the 574 annotation data. 575 576 `context` is the context corresponding to the content being annotated, 577 `row` is the tr Element being built, `number` is the line number being 578 processed and `line` is the line's actual content. 579 `data` is whatever additional data the `get_annotation_data` method 580 decided to provide. 581 """

582

583 584 -class IContentConverter(Interface):

585 """An extension point interface for generic MIME based content 586 conversion. 587 588 .. note:: This api will likely change in the future (see :teo:`#3332`) 589 590 """ 591

592 - def get_supported_conversions():

593 """Return an iterable of tuples in the form (key, name, extension, 594 in_mimetype, out_mimetype, quality) representing the MIME conversions 595 supported and 596 the quality ratio of the conversion in the range 0 to 9, where 0 means 597 no support and 9 means "perfect" support. eg. ('latex', 'LaTeX', 'tex', 598 'text/x-trac-wiki', 'text/plain', 8)"""

599

600 - def convert_content(req, mimetype, content, key):

601 """Convert the given content from mimetype to the output MIME type 602 represented by key. Returns a tuple in the form (content, 603 output_mime_type) or None if conversion is not possible. 604 605 content must be a `str` instance or an iterable instance which 606 iterates `str` instances."""

607

608 609 -class Content(object):

610 """A lazy file-like object that only reads `input` if necessary."""

611 - def __init__(self, input, max_size):

612 self.input = input 613 self.max_size = max_size 614 self.content = None

615

616 - def read(self, size=-1):

617 if size == 0: 618 return '' 619 if self.content is None: 620 self.content = StringIO(self.input.read(self.max_size)) 621 return self.content.read(size)

622

623 - def reset(self):

624 if self.content is not None: 625 self.content.seek(0)

626

627 628 -class Mimeview(Component):

629 """Generic HTML renderer for data, typically source code.""" 630 631 required = True 632 633 renderers = ExtensionPoint(IHTMLPreviewRenderer) 634 annotators = ExtensionPoint(IHTMLPreviewAnnotator) 635 converters = ExtensionPoint(IContentConverter) 636 637 default_charset = Option('trac', 'default_charset', 'utf-8', 638 """Charset to be used when in doubt.""") 639 640 tab_width = IntOption('mimeviewer', 'tab_width', 8, 641 """Displayed tab width in file preview. (''since 0.9'')""") 642 643 max_preview_size = IntOption('mimeviewer', 'max_preview_size', 262144, 644 """Maximum file size for HTML preview. (''since 0.9'')""") 645 646 mime_map = ListOption('mimeviewer', 'mime_map', 647 'text/x-dylan:dylan, text/x-idl:ice, text/x-ada:ads:adb', 648 doc="""List of additional MIME types and keyword mappings. 649 Mappings are comma-separated, and for each MIME type, 650 there's a colon (":") separated list of associated keywords 651 or file extensions. (''since 0.10'') 652 """) 653 654 mime_map_patterns = ListOption('mimeviewer', 'mime_map_patterns', 655 'text/plain:README|INSTALL|COPYING.*', 656 doc="""List of additional MIME types associated to filename patterns. 657 Mappings are comma-separated, and each mapping consists of a MIME type 658 and a Python regexp used for matching filenames, separated by a colon 659 (":"). (''since 1.0'') 660 """) 661 662 treat_as_binary = ListOption('mimeviewer', 'treat_as_binary', 663 'application/octet-stream, application/pdf, application/postscript, ' 664 'application/msword, application/rtf', 665 doc="""Comma-separated list of MIME types that should be treated as 666 binary data. (''since 0.11.5'')""") 667

668 - def __init__(self):

669 self._mime_map = None 670 self._mime_map_patterns = None

671 672 # Public API 673

674 - def get_supported_conversions(self, mimetype):

675 """Return a list of target MIME types in same form as 676 `IContentConverter.get_supported_conversions()`, but with the converter 677 component appended. Output is ordered from best to worst quality.""" 678 converters = [] 679 for converter in self.converters: 680 conversions = converter.get_supported_conversions() or [] 681 for k, n, e, im, om, q in conversions: 682 if im == mimetype and q > 0: 683 converters.append((k, n, e, im, om, q, converter)) 684 converters = sorted(converters, key=lambda i: i[-2], reverse=True) 685 return converters

686

687 - def convert_content(self, req, mimetype, content, key, filename=None, 688 url=None, iterable=False):

689 """Convert the given content to the target MIME type represented by 690 `key`, which can be either a MIME type or a key. Returns a tuple of 691 (content, output_mime_type, extension).""" 692 if not content: 693 return '', 'text/plain;charset=utf-8', '.txt' 694 695 # Ensure we have a MIME type for this content 696 full_mimetype = mimetype 697 if not full_mimetype: 698 if hasattr(content, 'read'): 699 content = content.read(self.max_preview_size) 700 full_mimetype = self.get_mimetype(filename, content) 701 if full_mimetype: 702 mimetype = ct_mimetype(full_mimetype) # split off charset 703 else: 704 mimetype = full_mimetype = 'text/plain' # fallback if not binary 705 706 # Choose best converter 707 candidates = list(self.get_supported_conversions(mimetype) or []) 708 candidates = [c for c in candidates if key in (c[0], c[4])] 709 if not candidates: 710 raise TracError( 711 _("No available MIME conversions from %(old)s to %(new)s", 712 old=mimetype, new=key)) 713 714 # First successful conversion wins 715 for ck, name, ext, input_mimettype, output_mimetype, quality, \ 716 converter in candidates: 717 output = converter.convert_content(req, mimetype, content, ck) 718 if output: 719 content, content_type = output 720 if iterable: 721 if isinstance(content, basestring): 722 content = (content,) 723 else: 724 if not isinstance(content, basestring): 725 content = ''.join(content) 726 return content, content_type, ext 727 raise TracError( 728 _("No available MIME conversions from %(old)s to %(new)s", 729 old=mimetype, new=key))

730

731 - def get_annotation_types(self):

732 """Generator that returns all available annotation types.""" 733 for annotator in self.annotators: 734 yield annotator.get_annotation_type()

735

736 - def render(self, context, mimetype, content, filename=None, url=None, 737 annotations=None, force_source=False):

738 """Render an XHTML preview of the given `content`. 739 740 `content` is the same as an `IHTMLPreviewRenderer.render`'s 741 `content` argument. 742 743 The specified `mimetype` will be used to select the most appropriate 744 `IHTMLPreviewRenderer` implementation available for this MIME type. 745 If not given, the MIME type will be infered from the filename or the 746 content. 747 748 Return a string containing the XHTML text. 749 750 When rendering with an `IHTMLPreviewRenderer` fails, a warning is added 751 to the request associated with the context (if any), unless the 752 `disable_warnings` hint is set to `True`. 753 """ 754 if not content: 755 return '' 756 if not isinstance(context, RenderingContext): 757 raise TypeError("RenderingContext expected (since 0.11)") 758 759 # Ensure we have a MIME type for this content 760 full_mimetype = mimetype 761 if not full_mimetype: 762 if hasattr(content, 'read'): 763 content = content.read(self.max_preview_size) 764 full_mimetype = self.get_mimetype(filename, content) 765 if full_mimetype: 766 mimetype = ct_mimetype(full_mimetype) # split off charset 767 else: 768 mimetype = full_mimetype = 'text/plain' # fallback if not binary 769 770 # Determine candidate `IHTMLPreviewRenderer`s 771 candidates = [] 772 for renderer in self.renderers: 773 qr = renderer.get_quality_ratio(mimetype) 774 if qr > 0: 775 candidates.append((qr, renderer)) 776 candidates.sort(lambda x, y: cmp(y[0], x[0])) 777 778 # Wrap file-like object so that it can be read multiple times 779 if hasattr(content, 'read'): 780 content = Content(content, self.max_preview_size) 781 782 # First candidate which renders successfully wins. 783 # Also, we don't want to expand tabs more than once. 784 expanded_content = None 785 for qr, renderer in candidates: 786 if force_source and not getattr(renderer, 'returns_source', False): 787 continue # skip non-source renderers in force_source mode 788 if isinstance(content, Content): 789 content.reset() 790 try: 791 ann_names = ', '.join(annotations) if annotations else \ 792 'no annotations' 793 self.log.debug('Trying to render HTML preview using %s [%s]', 794 renderer.__class__.__name__, ann_names) 795 796 # check if we need to perform a tab expansion 797 rendered_content = content 798 if getattr(renderer, 'expand_tabs', False): 799 if expanded_content is None: 800 content = content_to_unicode(self.env, content, 801 full_mimetype) 802 expanded_content = content.expandtabs(self.tab_width) 803 rendered_content = expanded_content 804 805 result = renderer.render(context, full_mimetype, 806 rendered_content, filename, url) 807 if not result: 808 continue 809 810 if not (force_source or getattr(renderer, 'returns_source', 811 False)): 812 # Direct rendering of content 813 if isinstance(result, basestring): 814 if not isinstance(result, unicode): 815 result = to_unicode(result) 816 return Markup(to_unicode(result)) 817 elif isinstance(result, Fragment): 818 return result.generate() 819 else: 820 return result 821 822 # Render content as source code 823 if annotations: 824 m = context.req.args.get('marks') if context.req else None 825 return self._render_source(context, result, annotations, 826 m and Ranges(m)) 827 else: 828 if isinstance(result, list): 829 result = Markup('\n').join(result) 830 return tag.div(class_='code')(tag.pre(result)).generate() 831 832 except Exception, e: 833 self.log.warning('HTML preview using %s with %r failed: %s', 834 renderer.__class__.__name__, context, 835 exception_to_unicode(e, traceback=True)) 836 if context.req and not context.get_hint('disable_warnings'): 837 from trac.web.chrome import add_warning 838 add_warning(context.req, 839 _("HTML preview using %(renderer)s failed (%(err)s)", 840 renderer=renderer.__class__.__name__, 841 err=exception_to_unicode(e)))

842

843 - def _render_source(self, context, stream, annotations, marks=None):

844 from trac.web.chrome import add_warning 845 annotators, labels, titles = {}, {}, {} 846 for annotator in self.annotators: 847 atype, alabel, atitle = annotator.get_annotation_type() 848 if atype in annotations: 849 labels[atype] = alabel 850 titles[atype] = atitle 851 annotators[atype] = annotator 852 annotations = [a for a in annotations if a in annotators] 853 854 if isinstance(stream, list): 855 stream = HTMLParser(StringIO(u'\n'.join(stream))) 856 elif isinstance(stream, unicode): 857 text = stream 858 def linesplitter(): 859 for line in text.splitlines(True): 860 yield TEXT, line, (None, -1, -1)

861 stream = linesplitter() 862 863 annotator_datas = [] 864 for a in annotations: 865 annotator = annotators[a] 866 try: 867 data = (annotator, annotator.get_annotation_data(context)) 868 except TracError, e: 869 self.log.warning("Can't use annotator '%s': %s", a, e) 870 add_warning(context.req, tag.strong( 871 tag_("Can't use %(annotator)s annotator: %(error)s", 872 annotator=tag.em(a), error=tag.pre(e)))) 873 data = None, None 874 annotator_datas.append(data) 875 876 def _head_row(): 877 return tag.tr( 878 [tag.th(labels[a], class_=a, title=titles[a]) 879 for a in annotations] + 880 [tag.th(u'\xa0', class_='content')] 881 )

882 883 def _body_rows(): 884 for idx, line in enumerate(_group_lines(stream)): 885 row = tag.tr() 886 if marks and idx + 1 in marks: 887 row(class_='hilite') 888 for annotator, data in annotator_datas: 889 if annotator: 890 annotator.annotate_row(context, row, idx+1, line, data) 891 else: 892 row.append(tag.td()) 893 row.append(tag.td(line)) 894 yield row 895 896 return tag.table(class_='code')( 897 tag.thead(_head_row()), 898 tag.tbody(_body_rows()) 899 ) 900

901 - def get_max_preview_size(self):

902 """:deprecated: since 0.10, use `max_preview_size` attribute directly. 903 """ 904 return self.max_preview_size

905

906 - def get_charset(self, content='', mimetype=None):

907 """Infer the character encoding from the `content` or the `mimetype`. 908 909 `content` is either a `str` or an `unicode` object. 910 911 The charset will be determined using this order: 912 * from the charset information present in the `mimetype` argument 913 * auto-detection of the charset from the `content` 914 * the configured `default_charset` 915 """ 916 if mimetype: 917 ctpos = mimetype.find('charset=') 918 if ctpos >= 0: 919 return mimetype[ctpos + 8:].strip() 920 if isinstance(content, str): 921 utf = detect_unicode(content) 922 if utf is not None: 923 return utf 924 return self.default_charset

925 926 @property

927 - def mime_map(self):

928 # Extend default extension to MIME type mappings with configured ones 929 if not self._mime_map: 930 self._mime_map = MIME_MAP.copy() 931 # augment mime_map from `IHTMLPreviewRenderer`s 932 for renderer in self.renderers: 933 if hasattr(renderer, 'get_extra_mimetypes'): 934 for mimetype, kwds in renderer.get_extra_mimetypes() or []: 935 self._mime_map[mimetype] = mimetype 936 for keyword in kwds: 937 self._mime_map[keyword] = mimetype 938 # augment/override mime_map from trac.ini 939 for mapping in self.config['mimeviewer'].getlist('mime_map'): 940 if ':' in mapping: 941 assocations = mapping.split(':') 942 for keyword in assocations: # Note: [0] kept on purpose 943 self._mime_map[keyword] = assocations[0] 944 return self._mime_map

945

946 - def get_mimetype(self, filename, content=None):

947 """Infer the MIME type from the `filename` or the `content`. 948 949 `content` is either a `str` or an `unicode` object. 950 951 Return the detected MIME type, augmented by the 952 charset information (i.e. "<mimetype>; charset=..."), 953 or `None` if detection failed. 954 """ 955 956 mimetype = get_mimetype(filename, content, self.mime_map, 957 self.mime_map_patterns) 958 charset = None 959 if mimetype: 960 charset = self.get_charset(content, mimetype) 961 if mimetype and charset and not 'charset' in mimetype: 962 mimetype += '; charset=' + charset 963 return mimetype

964 965 @property

966 - def mime_map_patterns(self):

967 if not self._mime_map_patterns: 968 self._mime_map_patterns = {} 969 for mapping in self.config['mimeviewer'] \ 970 .getlist('mime_map_patterns'): 971 if ':' in mapping: 972 mimetype, regexp = mapping.split(':', 1) 973 try: 974 self._mime_map_patterns[mimetype] = re.compile(regexp) 975 except re.error, e: 976 self.log.warning("mime_map_patterns contains invalid " 977 "regexp '%s' for mimetype '%s' (%s)", 978 regexp, mimetype, exception_to_unicode(e)) 979 return self._mime_map_patterns

980

981 - def is_binary(self, mimetype=None, filename=None, content=None):

982 """Check if a file must be considered as binary.""" 983 if not mimetype and filename: 984 mimetype = self.get_mimetype(filename, content) 985 if mimetype: 986 mimetype = ct_mimetype(mimetype) 987 if mimetype in self.treat_as_binary: 988 return True 989 if content is not None and is_binary(content): 990 return True 991 return False

992

993 - def to_utf8(self, content, mimetype=None):

994 """Convert an encoded `content` to utf-8. 995 996 :deprecated: since 0.10, you should use `unicode` strings only. 997 """ 998 return to_utf8(content, self.get_charset(content, mimetype))

999

1000 - def to_unicode(self, content, mimetype=None, charset=None):

1001 """Convert `content` (an encoded `str` object) to an `unicode` object. 1002 1003 This calls `trac.util.to_unicode` with the `charset` provided, 1004 or the one obtained by `Mimeview.get_charset()`. 1005 """ 1006 if not charset: 1007 charset = self.get_charset(content, mimetype) 1008 return to_unicode(content, charset)

1009

1010 - def configured_modes_mapping(self, renderer):

1011 """Return a MIME type to `(mode,quality)` mapping for given `option`""" 1012 types, option = {}, '%s_modes' % renderer 1013 for mapping in self.config['mimeviewer'].getlist(option): 1014 if not mapping: 1015 continue 1016 try: 1017 mimetype, mode, quality = mapping.split(':') 1018 types[mimetype] = (mode, int(quality)) 1019 except (TypeError, ValueError): 1020 self.log.warning("Invalid mapping '%s' specified in '%s' " 1021 "option.", mapping, option) 1022 return types

1023

1024 - def preview_data(self, context, content, length, mimetype, filename, 1025 url=None, annotations=None, force_source=False):

1026 """Prepares a rendered preview of the given `content`. 1027 1028 Note: `content` will usually be an object with a `read` method. 1029 """ 1030 data = {'raw_href': url, 'size': length, 1031 'max_file_size': self.max_preview_size, 1032 'max_file_size_reached': False, 1033 'rendered': None, 1034 } 1035 if length >= self.max_preview_size: 1036 data['max_file_size_reached'] = True 1037 else: 1038 result = self.render(context, mimetype, content, filename, url, 1039 annotations, force_source=force_source) 1040 data['rendered'] = result 1041 return data

1042

1043 - def send_converted(self, req, in_type, content, selector, filename='file'):

1044 """Helper method for converting `content` and sending it directly. 1045 1046 `selector` can be either a key or a MIME Type.""" 1047 from trac.web.chrome import Chrome 1048 from trac.web.api import RequestDone 1049 iterable = Chrome(self.env).use_chunked_encoding 1050 content, output_type, ext = self.convert_content(req, in_type, content, 1051 selector, 1052 iterable=iterable) 1053 if iterable: 1054 def encoder(content): 1055 for chunk in content: 1056 if isinstance(chunk, unicode): 1057 chunk = chunk.encode('utf-8') 1058 yield chunk

1059 content = encoder(content) 1060 length = None 1061 else: 1062 if isinstance(content, unicode): 1063 content = content.encode('utf-8') 1064 length = len(content) 1065 req.send_response(200) 1066 req.send_header('Content-Type', output_type) 1067 if length is not None: 1068 req.send_header('Content-Length', length) 1069 if filename: 1070 req.send_header('Content-Disposition', 1071 content_disposition('attachment', 1072 '%s.%s' % (filename, ext))) 1073 req.end_headers() 1074 req.write(content) 1075 raise RequestDone 1076

1077 1078 -def _group_lines(stream):

1079 space_re = re.compile('(?P<spaces> (?: +))|^(?P<tag><\w+.*?>)?( )') 1080 1081 def pad_spaces(match): 1082 m = match.group('spaces') 1083 if m: 1084 div, mod = divmod(len(m), 2) 1085 return div * u'\xa0 ' + mod * u'\xa0' 1086 return (match.group('tag') or '') + u'\xa0'

1087 1088 def _generate(): 1089 stack = [] 1090 def _reverse(): 1091 for event in reversed(stack): 1092 if event[0] is START: 1093 yield END, event[1][0], event[2] 1094 else: 1095 yield END_NS, event[1][0], event[2] 1096 1097 for kind, data, pos in stream: 1098 if kind is TEXT: 1099 lines = data.split('\n') 1100 if lines: 1101 # First element 1102 for e in stack: 1103 yield e 1104 yield kind, lines.pop(0), pos 1105 for e in _reverse(): 1106 yield e 1107 # Subsequent ones, prefix with \n 1108 for line in lines: 1109 yield TEXT, '\n', pos 1110 for e in stack: 1111 yield e 1112 yield kind, line, pos 1113 for e in _reverse(): 1114 yield e 1115 else: 1116 if kind is START or kind is START_NS: 1117 stack.append((kind, data, pos)) 1118 elif kind is END or kind is END_NS: 1119 stack.pop() 1120 else: 1121 yield kind, data, pos 1122 1123 buf = [] 1124 1125 # Fix the \n at EOF. 1126 if not isinstance(stream, list): 1127 stream = list(stream) 1128 found_text = False 1129 1130 for i in range(len(stream)-1, -1, -1): 1131 if stream[i][0] is TEXT: 1132 e = stream[i] 1133 # One chance to strip a \n 1134 if not found_text and e[1].endswith('\n'): 1135 stream[i] = (e[0], e[1][:-1], e[2]) 1136 if len(e[1]): 1137 found_text = True 1138 break 1139 if not found_text: 1140 raise StopIteration 1141 1142 for kind, data, pos in _generate(): 1143 if kind is TEXT and data == '\n': 1144 yield Stream(buf[:]) 1145 del buf[:] 1146 else: 1147 if kind is TEXT: 1148 data = space_re.sub(pad_spaces, data) 1149 buf.append((kind, data, pos)) 1150 if buf: 1151 yield Stream(buf[:]) 1152

1153 1154 # -- Default annotators 1155 1156 -class LineNumberAnnotator(Component):

1157 """Text annotator that adds a column with line numbers.""" 1158 implements(IHTMLPreviewAnnotator) 1159 1160 # IHTMLPreviewAnnotator methods 1161

1162 - def get_annotation_type(self):

1163 return 'lineno', _('Line'), _('Line numbers')

1164

1165 - def get_annotation_data(self, context):

1166 return None

1167

1168 - def annotate_row(self, context, row, lineno, line, data):

1169 row.append(tag.th(id='L%s' % lineno)( 1170 tag.a(lineno, href='#L%s' % lineno) 1171 ))

1172

1173 1174 # -- Default renderers 1175 1176 -class PlainTextRenderer(Component):

1177 """HTML preview renderer for plain text, and fallback for any kind of text 1178 for which no more specific renderer is available. 1179 """ 1180 implements(IHTMLPreviewRenderer) 1181 1182 expand_tabs = True 1183 returns_source = True 1184

1185 - def get_quality_ratio(self, mimetype):

1186 if mimetype in Mimeview(self.env).treat_as_binary: 1187 return 0 1188 return 1

1189

1190 - def render(self, context, mimetype, content, filename=None, url=None):

1191 if is_binary(content): 1192 self.log.debug("Binary data; no preview available") 1193 return 1194 1195 self.log.debug("Using default plain text mimeviewer") 1196 return content_to_unicode(self.env, content, mimetype)

1197

1198 1199 -class ImageRenderer(Component):

1200 """Inline image display. 1201 1202 This component doesn't need the `content` at all. 1203 """ 1204 implements(IHTMLPreviewRenderer) 1205

1206 - def get_quality_ratio(self, mimetype):

1207 if mimetype.startswith('image/'): 1208 return 8 1209 return 0

1210

1211 - def render(self, context, mimetype, content, filename=None, url=None):

1212 if url: 1213 return tag.div(tag.img(src=url, alt=filename), 1214 class_='image-file')

1215

1216 1217 -class WikiTextRenderer(Component):

1218 """HTML renderer for files containing Trac's own Wiki formatting markup.""" 1219 implements(IHTMLPreviewRenderer) 1220

1221 - def get_quality_ratio(self, mimetype):

1222 if mimetype in ('text/x-trac-wiki', 'application/x-trac-wiki'): 1223 return 8 1224 return 0

1225

1226 - def render(self, context, mimetype, content, filename=None, url=None):

1227 from trac.wiki.formatter import format_to_html 1228 return format_to_html(self.env, context, 1229 content_to_unicode(self.env, content, mimetype))

1230

Source Code for Module trac.mimeview.api