trac.mimeview.api

1 # -*- coding: utf-8 -*- 2 # 3 # Copyright (C) 2004-2009 Edgewall Software 4 # Copyright (C) 2004 Daniel Lundin <[email protected]> 5 # Copyright (C) 2005-2006 Christopher Lenz <[email protected]> 6 # Copyright (C) 2006-2007 Christian Boos <[email protected]> 7 # All rights reserved. 8 # 9 # This software is licensed as described in the file COPYING, which 10 # you should have received as part of this distribution. The terms 11 # are also available at http://trac.edgewall.org/wiki/TracLicense. 12 # 13 # This software consists of voluntary contributions made by many 14 # individuals. For the exact contribution history, see the revision 15 # history and logs, available at http://trac.edgewall.org/log/. 16 # 17 # Author: Daniel Lundin <[email protected]> 18 # Christopher Lenz <[email protected]> 19 # Christian Boos <[email protected]> 20 21 """File metadata management. 22 23 The `trac.mimeview` package centralizes the intelligence related to 24 file metadata, principally concerning the `type` (MIME type) of the content 25 and, if relevant, concerning the text encoding (charset) used by the content. 26 27 There are primarily two approaches for getting the MIME type of a given file: 28 * taking advantage of existing conventions for the file name 29 * examining the file content and applying various heuristics 30 31 The module also knows how to convert the file content from one type 32 to another type. 33 34 In some cases, only the `url` pointing to the file's content is actually 35 needed, that's why we avoid to read the file's content when it's not needed. 36 37 The actual `content` to be converted might be a `unicode` object, 38 but it can also be the raw byte string (`str`) object, or simply 39 an object that can be `read()`. 40 41 ---- 42 NOTE: for plugin developers 43 44 The Mimeview API is quite complex and many things there are currently 45 a bit difficult to work with (e.g. what an actual `content` might be, 46 see the last paragraph of this description). 47 48 So this area is mainly in a ''work in progress'' state, which will 49 be improved upon in the near future (see [trac:ticket:3332 #3332]). 50 51 In particular, if you are interested in writing `IContentConverter` 52 and `IHTMLPreviewRenderer` components, note that those interfaces 53 will be merged into a new style `IContentConverter`. 54 Feel free to contribute remarks and suggestions for improvements 55 to the corresponding ticket ([trac:ticket:3332 #3332]). 56 """ 57 58 import re 59 from StringIO import StringIO 60 61 from genshi import Markup, Stream 62 from genshi.core import TEXT, START, END, START_NS, END_NS 63 from genshi.builder import Fragment, tag 64 from genshi.input import HTMLParser 65 66 from trac.config import IntOption, ListOption, Option 67 from trac.core import * 68 from trac.resource import Resource 69 from trac.util import Ranges, content_disposition 70 from trac.util.text import exception_to_unicode, to_utf8, to_unicode 71 from trac.util.translation import _, tag_ 72 73 74 __all__ = ['get_mimetype', 'is_binary', 'detect_unicode', 'Mimeview', 75 'content_to_unicode', 'ct_mimetype', 'Context']

76 77 -class Context(object):

78 """Rendering context. 79 80 A rendering context specifies ''how'' the content should be rendered. 81 It holds together all the needed contextual information that will be 82 needed by individual renderer components. 83 84 To that end, a context keeps track of the Href instance (`.href`) which 85 should be used as a base for building URLs. 86 87 It also provides a `PermissionCache` (`.perm`) which can be used to 88 restrict the output so that only the authorized information is shown. 89 90 A rendering context may also be associated to some Trac resource which 91 will be used as the implicit reference when rendering relative links 92 or for retrieving relative content and can be used to retrieve related 93 metadata. 94 95 Contexts can be nested, and a new context can be created from an existing 96 context using the call syntax. The previous context can be retrieved using 97 the `.parent` attribute. 98 99 For example, when rendering a wiki text of a wiki page, the context will 100 be associated to a resource identifying that wiki page. 101 102 If that wiki text contains a `[[TicketQuery]]` wiki macro, the macro will 103 set up nested contexts for each matching ticket that will be used for 104 rendering the ticket descriptions. 105 106 :since: version 0.11 107 """ 108

109 - def __init__(self, resource, href=None, perm=None):

110 """Directly create a `Context`. 111 112 :param resource: the associated resource 113 :type resource: `Resource` 114 :param href: an `Href` object suitable for creating URLs 115 :param perm: a `PermissionCache` object used for restricting the 116 generated output to "authorized" information only. 117 118 The actual `.perm` attribute of the rendering context will be bound 119 to the given `resource` so that fine-grained permission checks will 120 apply to that. 121 """ 122 self.parent = None #: The parent context, if any 123 self.resource = resource 124 self.href = href 125 self.perm = resource and perm and perm(resource) or perm 126 self._hints = None

127 128 @classmethod

129 - def from_request(cls, req, resource=None, id=False, version=False, 130 parent=False, absurls=False):

131 """Create a rendering context from a request. 132 133 The `perm` and `href` properties of the context will be initialized 134 from the corresponding properties of the request object. 135 136 >>> from trac.test import Mock, MockPerm 137 >>> req = Mock(href=Mock(), perm=MockPerm()) 138 >>> context = Context.from_request(req) 139 >>> context.href is req.href 140 True 141 >>> context.perm is req.perm 142 True 143 144 :param req: the HTTP request object 145 :param resource: the `Resource` object or realm 146 :param id: the resource identifier 147 :param version: the resource version 148 :param absurls: whether URLs generated by the ``href`` object should 149 be absolute (including the protocol scheme and host 150 name) 151 :return: a new rendering context 152 :rtype: `Context` 153 """ 154 if req: 155 href = absurls and req.abs_href or req.href 156 perm = req.perm 157 else: 158 href = None 159 perm = None 160 self = cls(Resource(resource, id=id, version=version, parent=parent), 161 href=href, perm=perm) 162 self.req = req 163 return self

164

165 - def __repr__(self):

166 path = [] 167 context = self 168 while context: 169 if context.resource.realm: # skip toplevel resource 170 path.append(repr(context.resource)) 171 context = context.parent 172 return '<%s %s>' % (type(self).__name__, ' - '.join(reversed(path)))

173

174 - def __call__(self, resource=None, id=False, version=False, parent=False):

175 """Create a nested rendering context. 176 177 `self` will be the parent for the new nested context. 178 179 :param resource: either a `Resource` object or the realm string for a 180 resource specification to be associated to the new 181 context. If `None`, the resource will be the same 182 as the resource of the parent context. 183 :param id: the identifier part of the resource specification 184 :param version: the version of the resource specification 185 :return: the new context object 186 :rtype: `Context` 187 188 >>> context = Context('wiki', 'WikiStart') 189 >>> ticket1 = Resource('ticket', 1) 190 >>> context('ticket', 1).resource == ticket1 191 True 192 >>> context(ticket1).resource is ticket1 193 True 194 >>> context(ticket1)().resource is ticket1 195 True 196 """ 197 if resource: 198 resource = Resource(resource, id=id, version=version, 199 parent=parent) 200 else: 201 resource = self.resource 202 context = Context(resource, href=self.href, perm=self.perm) 203 context.parent = self 204 205 # hack for context instances created by from_request() 206 # this is needed because various parts of the code rely on a request 207 # object being available, but that will hopefully improve in the 208 # future 209 if hasattr(self, 'req'): 210 context.req = self.req 211 212 return context

213

214 - def __contains__(self, resource):

215 """Check whether a resource is in the rendering path. 216 217 The primary use for this check is to avoid to render the content of a 218 resource if we're already embedded in a context associated to that 219 resource. 220 221 :param resource: a `Resource` specification which will be checked for 222 """ 223 context = self 224 while context: 225 if context.resource and \ 226 context.resource.realm == resource.realm and \ 227 context.resource.id == resource.id: 228 # we don't care about version here 229 return True 230 context = context.parent

231 232 # Rendering hints 233 # 234 # A rendering hint is a key/value pairs that can influence renderers, 235 # wiki formatters and processors in the way they produce their output. 236 # The keys are strings, but the values could be anything. 237 # 238 # In nested contexts, the hints are inherited from their parent context, 239 # unless overriden locally. 240

241 - def set_hints(self, **keyvalues):

242 """Set rendering hints for this rendering context. 243 244 >>> ctx = Context('timeline') 245 >>> ctx.set_hints(wiki_flavor='oneliner', shorten_lines=True) 246 >>> t_ctx = ctx('ticket', 1) 247 >>> t_ctx.set_hints(wiki_flavor='html', preserve_newlines=True) 248 >>> (t_ctx.get_hint('wiki_flavor'), t_ctx.get_hint('shorten_lines'), \ 249 t_ctx.get_hint('preserve_newlines')) 250 ('html', True, True) 251 >>> (ctx.get_hint('wiki_flavor'), ctx.get_hint('shorten_lines'), \ 252 ctx.get_hint('preserve_newlines')) 253 ('oneliner', True, None) 254 """ 255 if self._hints is None: 256 self._hints = {} 257 hints = self._parent_hints() 258 if hints is not None: 259 self._hints.update(hints) 260 self._hints.update(keyvalues)

261

262 - def get_hint(self, hint, default=None):

263 """Retrieve a rendering hint from this context or an ancestor context. 264 265 >>> ctx = Context('timeline') 266 >>> ctx.set_hints(wiki_flavor='oneliner') 267 >>> t_ctx = ctx('ticket', 1) 268 >>> t_ctx.get_hint('wiki_flavor') 269 'oneliner' 270 >>> t_ctx.get_hint('preserve_newlines', True) 271 True 272 """ 273 hints = self._hints 274 if hints is None: 275 hints = self._parent_hints() 276 if hints is None: 277 return default 278 return hints.get(hint, default)

279

280 - def has_hint(self, hint):

281 """Test whether a rendering hint is defined in this context or in some 282 ancestor context. 283 284 >>> ctx = Context('timeline') 285 >>> ctx.set_hints(wiki_flavor='oneliner') 286 >>> t_ctx = ctx('ticket', 1) 287 >>> t_ctx.has_hint('wiki_flavor') 288 True 289 >>> t_ctx.has_hint('preserve_newlines') 290 False 291 """ 292 hints = self._hints 293 if hints is None: 294 hints = self._parent_hints() 295 if hints is None: 296 return False 297 return hint in hints

298

299 - def _parent_hints(self):

300 p = self.parent 301 while p and p._hints is None: 302 p = p.parent 303 return p and p._hints

304 305 306 # Some common MIME types and their associated keywords and/or file extensions 307 308 KNOWN_MIME_TYPES = { 309 'application/javascript': 'js', 310 'application/msword': 'doc dot', 311 'application/pdf': 'pdf', 312 'application/postscript': 'ps', 313 'application/rtf': 'rtf', 314 'application/x-sh': 'sh', 315 'application/x-csh': 'csh', 316 'application/x-troff': 'nroff roff troff', 317 'application/x-yaml': 'yml yaml', 318 319 'application/rss+xml': 'rss', 320 'application/xsl+xml': 'xsl', 321 'application/xslt+xml': 'xslt', 322 323 'image/x-icon': 'ico', 324 'image/svg+xml': 'svg', 325 326 'model/vrml': 'vrml wrl', 327 328 'text/css': 'css', 329 'text/html': 'html htm', 330 'text/plain': 'txt TXT text README INSTALL ' 331 'AUTHORS COPYING ChangeLog RELEASE', 332 'text/xml': 'xml', 333 334 # see also TEXT_X_TYPES below 335 'text/x-csrc': 'c xs', 336 'text/x-chdr': 'h', 337 'text/x-c++src': 'cc CC cpp C c++ C++', 338 'text/x-c++hdr': 'hh HH hpp H', 339 'text/x-csharp': 'cs c# C#', 340 'text/x-diff': 'patch', 341 'text/x-eiffel': 'e', 342 'text/x-elisp': 'el', 343 'text/x-fortran': 'f', 344 'text/x-haskell': 'hs', 345 'text/x-ini': 'ini cfg', 346 'text/x-objc': 'm mm', 347 'text/x-ocaml': 'ml mli', 348 'text/x-makefile': 'make mk Makefile GNUMakefile', 349 'text/x-pascal': 'pas', 350 'text/x-perl': 'pl pm PL', 351 'text/x-php': 'php3 php4', 352 'text/x-python': 'py', 353 'text/x-pyrex': 'pyx', 354 'text/x-ruby': 'rb', 355 'text/x-scheme': 'scm', 356 'text/x-textile': 'txtl', 357 'text/x-vba': 'vb vba bas', 358 'text/x-verilog': 'v', 359 'text/x-vhdl': 'vhd', 360 } 361 for t in KNOWN_MIME_TYPES.keys(): 362 types = KNOWN_MIME_TYPES[t].split() 363 if t.startswith('text/x-'): 364 types.append(t[len('text/x-'):]) 365 KNOWN_MIME_TYPES[t] = types 366 367 # extend the above with simple (text/x-<something>: <something>) mappings 368 369 TEXT_X_TYPES = """ 370 ada asm asp awk idl inf java ksh lua m4 mail psp rfc rst sql tcl tex zsh 371 """ 372 for x in TEXT_X_TYPES.split(): 373 KNOWN_MIME_TYPES.setdefault('text/x-%s' % x, []).append(x) 374 375 376 # Default mapping from keywords/extensions to known MIME types: 377 378 MIME_MAP = {} 379 for t, exts in KNOWN_MIME_TYPES.items(): 380 MIME_MAP[t] = t 381 for e in exts: 382 MIME_MAP[e] = t 383 384 # Simple builtin autodetection from the content using a regexp 385 MODE_RE = re.compile(r""" 386 \#!.+?env\s+(\w+) # 1. look for shebang with env 387 | \#!(?:[/\w.-_]+/)?(\w+) # 2. look for regular shebang 388 | -\*-\s*(?:mode:\s*)?([\w+-]+)\s*-\*- # 3. look for Emacs' -*- mode -*- 389 | vim:.*?(?:syntax|filetype|ft)=(\w+) # 4. look for VIM's syntax=<n> 390 """, re.VERBOSE)

391 392 -def get_mimetype(filename, content=None, mime_map=MIME_MAP):

393 """Guess the most probable MIME type of a file with the given name. 394 395 `filename` is either a filename (the lookup will then use the suffix) 396 or some arbitrary keyword. 397 398 `content` is either a `str` or an `unicode` string. 399 """ 400 suffix = filename.split('.')[-1] 401 if suffix in mime_map: 402 # 1) mimetype from the suffix, using the `mime_map` 403 return mime_map[suffix] 404 else: 405 mimetype = None 406 try: 407 import mimetypes 408 # 2) mimetype from the suffix, using the `mimetypes` module 409 mimetype = mimetypes.guess_type(filename)[0] 410 except: 411 pass 412 if not mimetype and content: 413 match = re.search(MODE_RE, content[:1000] + content[-1000:]) 414 if match: 415 mode = match.group(1) or match.group(2) or match.group(4) or \ 416 match.group(3).lower() 417 if mode in mime_map: 418 # 3) mimetype from the content, using the `MODE_RE` 419 return mime_map[mode] 420 else: 421 if is_binary(content): 422 # 4) mimetype from the content, using`is_binary` 423 return 'application/octet-stream' 424 return mimetype

425

426 -def ct_mimetype(content_type):

427 """Return the mimetype part of a content type.""" 428 return (content_type or '').split(';')[0].strip()

429

430 -def is_binary(data):

431 """Detect binary content by checking the first thousand bytes for zeroes. 432 433 Operate on either `str` or `unicode` strings. 434 """ 435 if isinstance(data, str) and detect_unicode(data): 436 return False 437 return '\0' in data[:1000]

438

439 -def detect_unicode(data):

440 """Detect different unicode charsets by looking for BOMs (Byte Order Mark). 441 442 Operate obviously only on `str` objects. 443 """ 444 if data.startswith('\xff\xfe'): 445 return 'utf-16-le' 446 elif data.startswith('\xfe\xff'): 447 return 'utf-16-be' 448 elif data.startswith('\xef\xbb\xbf'): 449 return 'utf-8' 450 else: 451 return None

452

453 -def content_to_unicode(env, content, mimetype):

454 """Retrieve an `unicode` object from a `content` to be previewed. 455 456 In case the raw content had an unicode BOM, we remove it. 457 458 >>> from trac.test import EnvironmentStub 459 >>> env = EnvironmentStub() 460 >>> content_to_unicode(env, u"\ufeffNo BOM! h\u00e9 !", '') 461 u'No BOM! h\\xe9 !' 462 >>> content_to_unicode(env, "\xef\xbb\xbfNo BOM! h\xc3\xa9 !", '') 463 u'No BOM! h\\xe9 !' 464 465 """ 466 mimeview = Mimeview(env) 467 if hasattr(content, 'read'): 468 content = content.read(mimeview.max_preview_size) 469 u = mimeview.to_unicode(content, mimetype) 470 if u and u[0] == u'\ufeff': 471 u = u[1:] 472 return u

473

474 475 -class IHTMLPreviewRenderer(Interface):

476 """Extension point interface for components that add HTML renderers of 477 specific content types to the `Mimeview` component. 478 479 ---- 480 This interface will be merged with IContentConverter, as conversion 481 to text/html will be simply a particular type of content conversion. 482 483 However, note that the IHTMLPreviewRenderer will still be supported 484 for a while through an adapter, whereas the IContentConverter interface 485 itself will be changed. 486 487 So if all you want to do is convert to HTML and don't feel like 488 following the API changes, rather you should rather implement this 489 interface for the time being. 490 --- 491 """ 492 493 # implementing classes should set this property to True if they 494 # support text content where Trac should expand tabs into spaces 495 expand_tabs = False 496 497 # indicate whether the output of this renderer is source code that can 498 # be decorated with annotations 499 returns_source = False 500

501 - def get_quality_ratio(mimetype):

502 """Return the level of support this renderer provides for the `content` 503 of the specified MIME type. The return value must be a number between 504 0 and 9, where 0 means no support and 9 means "perfect" support. 505 """

506

507 - def render(context, mimetype, content, filename=None, url=None):

508 """Render an XHTML preview of the raw `content` within a Context. 509 510 The `content` might be: 511 * a `str` object 512 * an `unicode` string 513 * any object with a `read` method, returning one of the above 514 515 It is assumed that the content will correspond to the given `mimetype`. 516 517 Besides the `content` value, the same content may eventually 518 be available through the `filename` or `url` parameters. 519 This is useful for renderers that embed objects, using <object> or 520 <img> instead of including the content inline. 521 522 Can return the generated XHTML text as a single string or as an 523 iterable that yields strings. In the latter case, the list will 524 be considered to correspond to lines of text in the original content. 525 """

526

527 528 -class IHTMLPreviewAnnotator(Interface):

529 """Extension point interface for components that can annotate an XHTML 530 representation of file contents with additional information.""" 531

532 - def get_annotation_type():

533 """Return a (type, label, description) tuple 534 that defines the type of annotation and provides human readable names. 535 The `type` element should be unique to the annotator. 536 The `label` element is used as column heading for the table, 537 while `description` is used as a display name to let the user 538 toggle the appearance of the annotation type. 539 """

540

541 - def get_annotation_data(context):

542 """Return some metadata to be used by the `annotate_row` method below. 543 544 This will be called only once, before lines are processed. 545 If this raises an error, that annotator won't be used. 546 """

547

548 - def annotate_row(context, row, number, line, data):

549 """Return the XHTML markup for the table cell that contains the 550 annotation data. 551 552 `context` is the context corresponding to the content being annotated, 553 `row` is the tr Element being built, `number` is the line number being 554 processed and `line` is the line's actual content. 555 `data` is whatever additional data the `get_annotation_data` method 556 decided to provide. 557 """

558

559 560 -class IContentConverter(Interface):

561 """An extension point interface for generic MIME based content 562 conversion. 563 564 ---- 565 NOTE: This api will likely change in the future, e.g.: 566 567 def get_supported_conversions(input): 568 '''Tells whether this converter can handle this `input` type. 569 570 Return an iterable of `Conversion` objects, each describing 571 how the conversion should be done and what will be the output type. 572 ''' 573 574 def convert_content(context, conversion, content): 575 '''Convert the given `AbstractContent` as specified by `Conversion`. 576 577 The conversion takes place in the given rendering context. 578 579 Return the converted content, which ''must'' be a `MimeContent` object. 580 ''' 581 ---- 582 """ 583

584 - def get_supported_conversions():

585 """Return an iterable of tuples in the form (key, name, extension, 586 in_mimetype, out_mimetype, quality) representing the MIME conversions 587 supported and 588 the quality ratio of the conversion in the range 0 to 9, where 0 means 589 no support and 9 means "perfect" support. eg. ('latex', 'LaTeX', 'tex', 590 'text/x-trac-wiki', 'text/plain', 8)"""

591

592 - def convert_content(req, mimetype, content, key):

593 """Convert the given content from mimetype to the output MIME type 594 represented by key. Returns a tuple in the form (content, 595 output_mime_type) or None if conversion is not possible."""

596

597 598 -class Content(object):

599 """A lazy file-like object that only reads `input` if necessary."""

600 - def __init__(self, input, max_size):

601 self.input = input 602 self.max_size = max_size 603 self.content = None

604

605 - def read(self, size=-1):

606 if size == 0: 607 return '' 608 if self.content is None: 609 self.content = StringIO(self.input.read(self.max_size)) 610 return self.content.read(size)

611

612 - def reset(self):

613 if self.content is not None: 614 self.content.seek(0)

615

616 617 -class Mimeview(Component):

618 """Generic HTML renderer for data, typically source code.""" 619 620 required = True 621 622 renderers = ExtensionPoint(IHTMLPreviewRenderer) 623 annotators = ExtensionPoint(IHTMLPreviewAnnotator) 624 converters = ExtensionPoint(IContentConverter) 625 626 default_charset = Option('trac', 'default_charset', 'iso-8859-15', 627 """Charset to be used when in doubt.""") 628 629 tab_width = IntOption('mimeviewer', 'tab_width', 8, 630 """Displayed tab width in file preview. (''since 0.9'')""") 631 632 max_preview_size = IntOption('mimeviewer', 'max_preview_size', 262144, 633 """Maximum file size for HTML preview. (''since 0.9'')""") 634 635 mime_map = ListOption('mimeviewer', 'mime_map', 636 'text/x-dylan:dylan,text/x-idl:ice,text/x-ada:ads:adb', 637 doc="""List of additional MIME types and keyword mappings. 638 Mappings are comma-separated, and for each MIME type, 639 there's a colon (":") separated list of associated keywords 640 or file extensions. (''since 0.10'')""") 641 642 treat_as_binary = ListOption('mimeviewer', 'treat_as_binary', 643 'application/octet-stream,application/pdf,application/postscript,' 644 'application/msword,application/rtf,', 645 doc="""Comma-separated list of MIME types that should be treated as 646 binary data. (''since 0.11.5'')""") 647

648 - def __init__(self):

649 self._mime_map = None

650 651 # Public API 652

653 - def get_supported_conversions(self, mimetype):

654 """Return a list of target MIME types in same form as 655 `IContentConverter.get_supported_conversions()`, but with the converter 656 component appended. Output is ordered from best to worst quality.""" 657 converters = [] 658 for converter in self.converters: 659 conversions = converter.get_supported_conversions() or [] 660 for k, n, e, im, om, q in conversions: 661 if im == mimetype and q > 0: 662 converters.append((k, n, e, im, om, q, converter)) 663 converters = sorted(converters, key=lambda i: i[-2], reverse=True) 664 return converters

665

666 - def convert_content(self, req, mimetype, content, key, filename=None, 667 url=None):

668 """Convert the given content to the target MIME type represented by 669 `key`, which can be either a MIME type or a key. Returns a tuple of 670 (content, output_mime_type, extension).""" 671 if not content: 672 return ('', 'text/plain;charset=utf-8', '.txt') 673 674 # Ensure we have a MIME type for this content 675 full_mimetype = mimetype 676 if not full_mimetype: 677 if hasattr(content, 'read'): 678 content = content.read(self.max_preview_size) 679 full_mimetype = self.get_mimetype(filename, content) 680 if full_mimetype: 681 mimetype = ct_mimetype(full_mimetype) # split off charset 682 else: 683 mimetype = full_mimetype = 'text/plain' # fallback if not binary 684 685 # Choose best converter 686 candidates = list(self.get_supported_conversions(mimetype) or []) 687 candidates = [c for c in candidates if key in (c[0], c[4])] 688 if not candidates: 689 raise TracError( 690 _("No available MIME conversions from %(old)s to %(new)s", 691 old=mimetype, new=key)) 692 693 # First successful conversion wins 694 for ck, name, ext, input_mimettype, output_mimetype, quality, \ 695 converter in candidates: 696 output = converter.convert_content(req, mimetype, content, ck) 697 if output: 698 return (output[0], output[1], ext) 699 raise TracError( 700 _("No available MIME conversions from %(old)s to %(new)s", 701 old=mimetype, new=key))

702

703 - def get_annotation_types(self):

704 """Generator that returns all available annotation types.""" 705 for annotator in self.annotators: 706 yield annotator.get_annotation_type()

707

708 - def render(self, context, mimetype, content, filename=None, url=None, 709 annotations=None, force_source=False):

710 """Render an XHTML preview of the given `content`. 711 712 `content` is the same as an `IHTMLPreviewRenderer.render`'s 713 `content` argument. 714 715 The specified `mimetype` will be used to select the most appropriate 716 `IHTMLPreviewRenderer` implementation available for this MIME type. 717 If not given, the MIME type will be infered from the filename or the 718 content. 719 720 Return a string containing the XHTML text. 721 722 When rendering with an `IHTMLPreviewRenderer` fails, a warning is added 723 to the request associated with the context (if any), unless the 724 `disable_warnings` hint is set to `True`. 725 """ 726 if not content: 727 return '' 728 if not isinstance(context, Context): 729 # backwards compatibility: the first argument used to be the 730 # request prior to 0.11 731 context = Context.from_request(context) 732 733 # Ensure we have a MIME type for this content 734 full_mimetype = mimetype 735 if not full_mimetype: 736 if hasattr(content, 'read'): 737 content = content.read(self.max_preview_size) 738 full_mimetype = self.get_mimetype(filename, content) 739 if full_mimetype: 740 mimetype = ct_mimetype(full_mimetype) # split off charset 741 else: 742 mimetype = full_mimetype = 'text/plain' # fallback if not binary 743 744 # Determine candidate `IHTMLPreviewRenderer`s 745 candidates = [] 746 for renderer in self.renderers: 747 qr = renderer.get_quality_ratio(mimetype) 748 if qr > 0: 749 candidates.append((qr, renderer)) 750 candidates.sort(lambda x, y: cmp(y[0], x[0])) 751 752 # Wrap file-like object so that it can be read multiple times 753 if hasattr(content, 'read'): 754 content = Content(content, self.max_preview_size) 755 756 # First candidate which renders successfully wins. 757 # Also, we don't want to expand tabs more than once. 758 expanded_content = None 759 for qr, renderer in candidates: 760 if force_source and not getattr(renderer, 'returns_source', False): 761 continue # skip non-source renderers in force_source mode 762 if isinstance(content, Content): 763 content.reset() 764 try: 765 ann_names = annotations and ', '.join(annotations) or \ 766 'no annotations' 767 self.log.debug('Trying to render HTML preview using %s [%s]', 768 renderer.__class__.__name__, ann_names) 769 770 # check if we need to perform a tab expansion 771 rendered_content = content 772 if getattr(renderer, 'expand_tabs', False): 773 if expanded_content is None: 774 content = content_to_unicode(self.env, content, 775 full_mimetype) 776 expanded_content = content.expandtabs(self.tab_width) 777 rendered_content = expanded_content 778 779 result = renderer.render(context, full_mimetype, 780 rendered_content, filename, url) 781 if not result: 782 continue 783 784 if not (force_source or getattr(renderer, 'returns_source', 785 False)): 786 # Direct rendering of content 787 if isinstance(result, basestring): 788 if not isinstance(result, unicode): 789 result = to_unicode(result) 790 return Markup(to_unicode(result)) 791 elif isinstance(result, Fragment): 792 return result.generate() 793 else: 794 return result 795 796 # Render content as source code 797 if annotations: 798 m = context.req and context.req.args.get('marks') or None 799 return self._render_source(context, result, annotations, 800 m and Ranges(m)) 801 else: 802 if isinstance(result, list): 803 result = Markup('\n').join(result) 804 return tag.div(class_='code')(tag.pre(result)).generate() 805 806 except Exception, e: 807 self.log.warning('HTML preview using %s failed: %s', 808 renderer.__class__.__name__, 809 exception_to_unicode(e, traceback=True)) 810 if context.req and not context.get_hint('disable_warnings'): 811 from trac.web.chrome import add_warning 812 add_warning(context.req, 813 _("HTML preview using %(renderer)s failed (%(err)s)", 814 renderer=renderer.__class__.__name__, 815 err=exception_to_unicode(e)))

816

817 - def _render_source(self, context, stream, annotations, marks=None):

818 from trac.web.chrome import add_warning 819 annotators, labels, titles = {}, {}, {} 820 for annotator in self.annotators: 821 atype, alabel, atitle = annotator.get_annotation_type() 822 if atype in annotations: 823 labels[atype] = alabel 824 titles[atype] = atitle 825 annotators[atype] = annotator 826 annotations = [a for a in annotations if a in annotators] 827 828 if isinstance(stream, list): 829 stream = HTMLParser(StringIO('\n'.join(stream))) 830 elif isinstance(stream, unicode): 831 text = stream 832 def linesplitter(): 833 for line in text.splitlines(True): 834 yield TEXT, line, (None, -1, -1)

835 stream = linesplitter() 836 837 annotator_datas = [] 838 for a in annotations: 839 annotator = annotators[a] 840 try: 841 data = (annotator, annotator.get_annotation_data(context)) 842 except TracError, e: 843 self.log.warning("Can't use annotator '%s': %s", a, e.message) 844 add_warning(context.req, tag.strong( 845 tag_("Can't use %(annotator)s annotator: %(error)s", 846 annotator=tag.em(a), error=tag.pre(e.message)))) 847 data = (None, None) 848 annotator_datas.append(data) 849 850 def _head_row(): 851 return tag.tr( 852 [tag.th(labels[a], class_=a, title=titles[a]) 853 for a in annotations] + 854 [tag.th(u'\xa0', class_='content')] 855 )

856 857 def _body_rows(): 858 for idx, line in enumerate(_group_lines(stream)): 859 row = tag.tr() 860 if marks and idx + 1 in marks: 861 row(class_='hilite') 862 for annotator, data in annotator_datas: 863 if annotator: 864 annotator.annotate_row(context, row, idx+1, line, data) 865 else: 866 row.append(tag.td()) 867 row.append(tag.td(line)) 868 yield row 869 870 return tag.table(class_='code')( 871 tag.thead(_head_row()), 872 tag.tbody(_body_rows()) 873 ) 874

875 - def get_max_preview_size(self):

876 """Deprecated: use `max_preview_size` attribute directly.""" 877 return self.max_preview_size

878

879 - def get_charset(self, content='', mimetype=None):

880 """Infer the character encoding from the `content` or the `mimetype`. 881 882 `content` is either a `str` or an `unicode` object. 883 884 The charset will be determined using this order: 885 * from the charset information present in the `mimetype` argument 886 * auto-detection of the charset from the `content` 887 * the configured `default_charset` 888 """ 889 if mimetype: 890 ctpos = mimetype.find('charset=') 891 if ctpos >= 0: 892 return mimetype[ctpos + 8:].strip() 893 if isinstance(content, str): 894 utf = detect_unicode(content) 895 if utf is not None: 896 return utf 897 return self.default_charset

898 899 @property

900 - def mime_map(self):

901 # Extend default extension to MIME type mappings with configured ones 902 if not self._mime_map: 903 self._mime_map = MIME_MAP.copy() 904 for mapping in self.config['mimeviewer'].getlist('mime_map'): 905 if ':' in mapping: 906 assocations = mapping.split(':') 907 for keyword in assocations: # Note: [0] kept on purpose 908 self._mime_map[keyword] = assocations[0] 909 return self._mime_map

910

911 - def get_mimetype(self, filename, content=None):

912 """Infer the MIME type from the `filename` or the `content`. 913 914 `content` is either a `str` or an `unicode` object. 915 916 Return the detected MIME type, augmented by the 917 charset information (i.e. "<mimetype>; charset=..."), 918 or `None` if detection failed. 919 """ 920 921 mimetype = get_mimetype(filename, content, self.mime_map) 922 charset = None 923 if mimetype: 924 charset = self.get_charset(content, mimetype) 925 if mimetype and charset and not 'charset' in mimetype: 926 mimetype += '; charset=' + charset 927 return mimetype

928

929 - def is_binary(self, mimetype=None, filename=None, content=None):

930 """Check if a file must be considered as binary.""" 931 if not mimetype and filename: 932 mimetype = self.get_mimetype(filename, content) 933 if mimetype: 934 mimetype = ct_mimetype(mimetype) 935 if mimetype in self.treat_as_binary: 936 return True 937 if content is not None and is_binary(content): 938 return True 939 return False

940

941 - def to_utf8(self, content, mimetype=None):

942 """Convert an encoded `content` to utf-8. 943 944 ''Deprecated in 0.10. You should use `unicode` strings only.'' 945 """ 946 return to_utf8(content, self.get_charset(content, mimetype))

947

948 - def to_unicode(self, content, mimetype=None, charset=None):

949 """Convert `content` (an encoded `str` object) to an `unicode` object. 950 951 This calls `trac.util.to_unicode` with the `charset` provided, 952 or the one obtained by `Mimeview.get_charset()`. 953 """ 954 if not charset: 955 charset = self.get_charset(content, mimetype) 956 return to_unicode(content, charset)

957

958 - def configured_modes_mapping(self, renderer):

959 """Return a MIME type to `(mode,quality)` mapping for given `option`""" 960 types, option = {}, '%s_modes' % renderer 961 for mapping in self.config['mimeviewer'].getlist(option): 962 if not mapping: 963 continue 964 try: 965 mimetype, mode, quality = mapping.split(':') 966 types[mimetype] = (mode, int(quality)) 967 except (TypeError, ValueError): 968 self.log.warning("Invalid mapping '%s' specified in '%s' " 969 "option.", mapping, option) 970 return types

971

972 - def preview_data(self, context, content, length, mimetype, filename, 973 url=None, annotations=None, force_source=False):

974 """Prepares a rendered preview of the given `content`. 975 976 Note: `content` will usually be an object with a `read` method. 977 """ 978 data = {'raw_href': url, 'size': length, 979 'max_file_size': self.max_preview_size, 980 'max_file_size_reached': False, 981 'rendered': None, 982 } 983 if length >= self.max_preview_size: 984 data['max_file_size_reached'] = True 985 else: 986 result = self.render(context, mimetype, content, filename, url, 987 annotations, force_source=force_source) 988 data['rendered'] = result 989 return data

990

991 - def send_converted(self, req, in_type, content, selector, filename='file'):

992 """Helper method for converting `content` and sending it directly. 993 994 `selector` can be either a key or a MIME Type.""" 995 from trac.web.api import RequestDone 996 content, output_type, ext = self.convert_content(req, in_type, 997 content, selector) 998 if isinstance(content, unicode): 999 content = content.encode('utf-8') 1000 req.send_response(200) 1001 req.send_header('Content-Type', output_type) 1002 req.send_header('Content-Length', len(content)) 1003 if filename: 1004 req.send_header('Content-Disposition', 1005 content_disposition('attachment', 1006 '%s.%s' % (filename, ext))) 1007 req.end_headers() 1008 req.write(content) 1009 raise RequestDone

1010

1011 1012 -def _group_lines(stream):

1013 space_re = re.compile('(?P<spaces> (?: +))|^(?P<tag><\w+.*?>)?( )') 1014 def pad_spaces(match): 1015 m = match.group('spaces') 1016 if m: 1017 div, mod = divmod(len(m), 2) 1018 return div * u'\xa0 ' + mod * u'\xa0' 1019 return (match.group('tag') or '') + u'\xa0'

1020 1021 def _generate(): 1022 stack = [] 1023 def _reverse(): 1024 for event in reversed(stack): 1025 if event[0] is START: 1026 yield END, event[1][0], event[2] 1027 else: 1028 yield END_NS, event[1][0], event[2] 1029 1030 for kind, data, pos in stream: 1031 if kind is TEXT: 1032 lines = data.split('\n') 1033 if lines: 1034 # First element 1035 for e in stack: 1036 yield e 1037 yield kind, lines.pop(0), pos 1038 for e in _reverse(): 1039 yield e 1040 # Subsequent ones, prefix with \n 1041 for line in lines: 1042 yield TEXT, '\n', pos 1043 for e in stack: 1044 yield e 1045 yield kind, line, pos 1046 for e in _reverse(): 1047 yield e 1048 else: 1049 if kind is START or kind is START_NS: 1050 stack.append((kind, data, pos)) 1051 elif kind is END or kind is END_NS: 1052 stack.pop() 1053 else: 1054 yield kind, data, pos 1055 1056 buf = [] 1057 1058 # Fix the \n at EOF. 1059 if not isinstance(stream, list): 1060 stream = list(stream) 1061 found_text = False 1062 1063 for i in range(len(stream)-1, -1, -1): 1064 if stream[i][0] is TEXT: 1065 e = stream[i] 1066 # One chance to strip a \n 1067 if not found_text and e[1].endswith('\n'): 1068 stream[i] = (e[0], e[1][:-1], e[2]) 1069 if len(e[1]): 1070 found_text = True 1071 break 1072 if not found_text: 1073 raise StopIteration 1074 1075 for kind, data, pos in _generate(): 1076 if kind is TEXT and data == '\n': 1077 yield Stream(buf[:]) 1078 del buf[:] 1079 else: 1080 if kind is TEXT: 1081 data = space_re.sub(pad_spaces, data) 1082 buf.append((kind, data, pos)) 1083 if buf: 1084 yield Stream(buf[:]) 1085

1086 1087 # -- Default annotators 1088 1089 -class LineNumberAnnotator(Component):

1090 """Text annotator that adds a column with line numbers.""" 1091 implements(IHTMLPreviewAnnotator) 1092 1093 # ITextAnnotator methods 1094

1095 - def get_annotation_type(self):

1096 return 'lineno', _('Line'), _('Line numbers')

1097

1098 - def get_annotation_data(self, context):

1099 return None

1100

1101 - def annotate_row(self, context, row, lineno, line, data):

1102 row.append(tag.th(id='L%s' % lineno)( 1103 tag.a(lineno, href='#L%s' % lineno) 1104 ))

1105

1106 1107 # -- Default renderers 1108 1109 -class PlainTextRenderer(Component):

1110 """HTML preview renderer for plain text, and fallback for any kind of text 1111 for which no more specific renderer is available. 1112 """ 1113 implements(IHTMLPreviewRenderer) 1114 1115 expand_tabs = True 1116 returns_source = True 1117

1118 - def get_quality_ratio(self, mimetype):

1119 if mimetype in Mimeview(self.env).treat_as_binary: 1120 return 0 1121 return 1

1122

1123 - def render(self, context, mimetype, content, filename=None, url=None):

1124 if is_binary(content): 1125 self.log.debug("Binary data; no preview available") 1126 return 1127 1128 self.log.debug("Using default plain text mimeviewer") 1129 return content_to_unicode(self.env, content, mimetype)

1130

1131 1132 -class ImageRenderer(Component):

1133 """Inline image display. 1134 1135 This component doesn't need the `content` at all. 1136 """ 1137 implements(IHTMLPreviewRenderer) 1138

1139 - def get_quality_ratio(self, mimetype):

1140 if mimetype.startswith('image/'): 1141 return 8 1142 return 0

1143

1144 - def render(self, context, mimetype, content, filename=None, url=None):

1145 if url: 1146 return tag.div(tag.img(src=url, alt=filename), 1147 class_='image-file')

1148

1149 1150 -class WikiTextRenderer(Component):

1151 """HTML renderer for files containing Trac's own Wiki formatting markup.""" 1152 implements(IHTMLPreviewRenderer) 1153

1154 - def get_quality_ratio(self, mimetype):

1155 if mimetype in ('text/x-trac-wiki', 'application/x-trac-wiki'): 1156 return 8 1157 return 0

1158

1159 - def render(self, context, mimetype, content, filename=None, url=None):

1160 from trac.wiki.formatter import format_to_html 1161 return format_to_html(self.env, context, 1162 content_to_unicode(self.env, content, mimetype))

1163

Source Code for Module trac.mimeview.api