1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21 import __builtin__
22 import locale
23 import os
24 import re
25 import sys
26 import textwrap
27 from urllib import quote, quote_plus, unquote
28 from unicodedata import east_asian_width
29
30 import jinja2
31
32 CRLF = '\r\n'
35 """A special tag object evaluating to the empty string"""
36 __slots__ = []
37
38 empty = Empty()
39
40 del Empty
46 """Creates a Jinja2 ``Environment`` configured with Trac conventions.
47
48 All default parameters can optionally be overridden. The ``loader``
49 parameter is not set by default, so unless it is set by the
50 caller, only inline templates can be created from the environment.
51
52 :rtype: `jinja.Environment`
53
54 """
55 exts = ('.html', '.rss', '.xml')
56 def filterout_none(v):
57 return '' if v is None else v
58 def autoescape_extensions(template):
59 return template and template.endswith(exts)
60 defaults = dict(
61 variable_start_string='${',
62 variable_end_string='}',
63 line_statement_prefix='#',
64 line_comment_prefix='##',
65 trim_blocks=True,
66 lstrip_blocks=True,
67 extensions=['jinja2.ext.do', 'jinja2.ext.i18n', 'jinja2.ext.with_'],
68 finalize=filterout_none,
69 autoescape=autoescape_extensions,
70 )
71 defaults.update(kwargs)
72 jenv = jinja2.Environment(**defaults)
73 jenv.globals.update(
74 len=len,
75 )
76 return jenv
77
79 """Creates a Jinja2 ``Template`` from inlined source.
80
81 :param template: the template content
82 :param text: if set to `False`, the result of the variable
83 expansion will be XML/HTML escaped
84 :param kwargs: additional arguments to pass to `jinja2env`. See
85 `jinja2.Environment` for supported arguments.
86 """
87 return jinja2env(autoescape=not text, **kwargs).from_string(template)
88
93 """Convert input to an `unicode` object.
94
95 For a `str` object, we'll first try to decode the bytes using the given
96 `charset` encoding (or UTF-8 if none is specified), then we fall back to
97 the latin1 encoding which might be correct or not, but at least preserves
98 the original byte sequence by mapping each byte to the corresponding
99 unicode code point in the range U+0000 to U+00FF.
100
101 For anything else, a simple `unicode()` conversion is attempted,
102 with special care taken with `Exception` objects.
103 """
104 if isinstance(text, str):
105 try:
106 return unicode(text, charset or 'utf-8')
107 except UnicodeDecodeError:
108 return unicode(text, 'latin1')
109 elif isinstance(text, Exception):
110 if os.name == 'nt' and isinstance(text, EnvironmentError):
111 strerror = text.strerror
112 filename = text.filename
113 if isinstance(strerror, basestring) and \
114 isinstance(filename, basestring):
115 try:
116 if not isinstance(strerror, unicode):
117 strerror = unicode(strerror, 'mbcs')
118 if not isinstance(filename, unicode):
119 filename = unicode(filename, 'mbcs')
120 except UnicodeError:
121 pass
122 else:
123 if isinstance(text, WindowsError):
124 return u"[Error %s] %s: '%s'" % (text.winerror,
125 strerror, filename)
126 else:
127 return u"[Errno %s] %s: '%s'" % (text.errno, strerror,
128 filename)
129
130
131 try:
132 return unicode(str(text), 'mbcs')
133 except UnicodeError:
134 pass
135
136 try:
137
138 return unicode(text)
139 except UnicodeError:
140
141 return ' '.join(to_unicode(arg) for arg in text.args)
142 return unicode(text)
143
146 """Convert an `Exception` to an `unicode` object.
147
148 In addition to `to_unicode`, this representation of the exception
149 also contains the class name and optionally the traceback.
150 """
151 message = '%s: %s' % (e.__class__.__name__, to_unicode(e))
152 if traceback:
153 from trac.util import get_last_traceback
154 traceback_only = get_last_traceback().split('\n')[:-2]
155 message = '\n%s\n%s' % (to_unicode('\n'.join(traceback_only)), message)
156 return message
157
160 """Convert a filesystem path to unicode, using the filesystem encoding."""
161 if isinstance(path, str):
162 try:
163 return unicode(path, sys.getfilesystemencoding())
164 except UnicodeDecodeError:
165 return unicode(path, 'latin1')
166 return unicode(path)
167
168
169 _ws_leading_re = re.compile(u'\\A[\\s\u200b]+', re.UNICODE)
170 _ws_trailing_re = re.compile(u'[\\s\u200b]+\\Z', re.UNICODE)
171
172 -def stripws(text, leading=True, trailing=True):
173 """Strips unicode white-spaces and ZWSPs from ``text``.
174
175 :param leading: strips leading spaces from ``text`` unless ``leading`` is
176 `False`.
177 :param trailing: strips trailing spaces from ``text`` unless ``trailing``
178 is `False`.
179 """
180 if leading:
181 text = _ws_leading_re.sub('', text)
182 if trailing:
183 text = _ws_trailing_re.sub('', text)
184 return text
185
188 """Strips unicode white-spaces and ZWSPs from each line of ``text``.
189
190 :param leading: strips leading spaces from ``text`` unless ``leading`` is
191 `False`.
192 :param trailing: strips trailing spaces from ``text`` unless ``trailing``
193 is `False`.
194 """
195 lines = re.compile(r'(\n|\r\n|\r)').split(text)
196 if leading:
197 lines[::2] = (_ws_leading_re.sub('', line) for line in lines[::2])
198 if trailing:
199 lines[::2] = (_ws_trailing_re.sub('', line) for line in lines[::2])
200 return ''.join(lines)
201
202
203 _js_quote = {'\\': '\\\\', '"': '\\"', '\b': '\\b', '\f': '\\f',
204 '\n': '\\n', '\r': '\\r', '\t': '\\t', "'": "\\'"}
205 for i in list(xrange(0x20)) + [ord(c) for c in u'&<>\u2028\u2029']:
206 _js_quote.setdefault(unichr(i), '\\u%04x' % i)
207 _js_quote_re = re.compile(r'[\x00-\x1f\\"\b\f\n\r\t\'&<>' + u'\u2028\u2029]')
208 _js_string_re = re.compile(r'[\x00-\x1f\\"\b\f\n\r\t&<>' + u'\u2028\u2029]')
212 """Quote strings for inclusion in single or double quote delimited
213 Javascript strings
214 """
215 if not text:
216 return ''
217 def replace(match):
218 return _js_quote[match.group(0)]
219 return _js_quote_re.sub(replace, text)
220
223 """Embed the given string in a double quote delimited Javascript string
224 (conform to the JSON spec)
225 """
226 if not text:
227 return '""'
228 def replace(match):
229 return _js_quote[match.group(0)]
230 return '"%s"' % _js_string_re.sub(replace, text)
231
234 """A unicode aware version of `urllib.quote`
235
236 :param value: anything that converts to a `str`. If `unicode`
237 input is given, it will be UTF-8 encoded.
238 :param safe: as in `quote`, the characters that would otherwise be
239 quoted but shouldn't here (defaults to '/')
240 """
241 return quote(value.encode('utf-8') if isinstance(value, unicode)
242 else str(value), safe)
243
246 """A unicode aware version of `urllib.quote_plus`.
247
248 :param value: anything that converts to a `str`. If `unicode`
249 input is given, it will be UTF-8 encoded.
250 :param safe: as in `quote_plus`, the characters that would
251 otherwise be quoted but shouldn't here (defaults to
252 '/')
253 """
254 return quote_plus(value.encode('utf-8') if isinstance(value, unicode)
255 else str(value), safe)
256
259 """A unicode aware version of `urllib.unquote`.
260
261 :param str: UTF-8 encoded `str` value (for example, as obtained by
262 `unicode_quote`).
263 :rtype: `unicode`
264 """
265 return unquote(value).decode('utf-8')
266
284
285
286 _qs_quote_safe = ''.join(chr(c) for c in xrange(0x21, 0x7f))
292
293
294 -def to_utf8(text, charset='latin1'):
295 """Convert input to a UTF-8 `str` object.
296
297 If the input is not an `unicode` object, we assume the encoding is
298 already UTF-8, ISO Latin-1, or as specified by the optional
299 *charset* parameter.
300 """
301 if isinstance(text, str):
302 try:
303 u = unicode(text, 'utf-8')
304 except UnicodeError:
305 try:
306
307 u = unicode(text, charset)
308 except UnicodeError:
309
310 u = unicode(text, 'latin1')
311 else:
312
313 return text
314 else:
315 u = to_unicode(text)
316 return u.encode('utf-8')
317
320 """Conceal the actual content of the string when `repr` is called."""
323
326 """Return the appropriate encoding for the given stream."""
327 encoding = getattr(stream, 'encoding', None)
328
329 return encoding if encoding not in (None, 'cp0') else 'utf-8'
330
333 """Output the given arguments to the console, encoding the output
334 as appropriate.
335
336 :param kwargs: ``newline`` controls whether a newline will be appended
337 (defaults to `True`)
338 """
339 cons_charset = stream_encoding(out)
340 out.write(' '.join(to_unicode(a).encode(cons_charset, 'replace')
341 for a in args))
342 if kwargs.get('newline', True):
343 out.write('\n')
344
349
354
357 """Format `message`, do a `console.print` on `sys.stdout` and flush
358 the buffer.
359 """
360 if args:
361 message %= args
362 printout(message, **kwargs)
363 sys.stdout.flush()
364
367 """Format `message`, do a `console.print` on `sys.stderr` and flush
368 the buffer.
369 """
370 if args:
371 message %= args
372 printerr(message, **kwargs)
373 sys.stderr.flush()
374
382
383
384 _preferredencoding = locale.getpreferredencoding()
387 """Return the encoding, which is retrieved on ahead, according to user
388 preference.
389
390 We should use this instead of `locale.getpreferredencoding()` which
391 is not thread-safe."""
392 return _preferredencoding
393
394
395
396
397 -def text_width(text, ambiwidth=1):
398 """Determine the column width of `text` in Unicode characters.
399
400 The characters in the East Asian Fullwidth (F) or East Asian Wide (W)
401 have a column width of 2. The other characters in the East Asian
402 Halfwidth (H) or East Asian Narrow (Na) have a column width of 1.
403
404 That `ambiwidth` parameter is used for the column width of the East
405 Asian Ambiguous (A). If `1`, the same width as characters in US-ASCII.
406 This is expected by most users. If `2`, twice the width of US-ASCII
407 characters. This is expected by CJK users.
408
409 cf. http://www.unicode.org/reports/tr11/.
410 """
411 twice = 'FWA' if ambiwidth == 2 else 'FW'
412 return sum([2 if east_asian_width(chr) in twice else 1
413 for chr in to_unicode(text)])
414
417 """Return width of East Asian Ambiguous based on locale environment
418 variables or Windows codepage.
419 """
420
421 if os.name == 'nt':
422 import ctypes
423 codepage = ctypes.windll.kernel32.GetConsoleOutputCP()
424 if codepage in (932,
425 936,
426 949,
427 950):
428 return 2
429 else:
430 for name in ('LANGUAGE', 'LC_ALL', 'LC_MESSAGES', 'LANG'):
431 value = os.environ.get(name) or ''
432 if value:
433 if name == 'LANGUAGE' and ':' in value:
434 value = value.split(':')[0]
435 return 2 if value.lower().startswith(('zh', 'ja', 'ko')) else 1
436
437 return 1
438
439
440 _default_ambiwidth = _get_default_ambiwidth()
441
442
443 -def print_table(data, headers=None, sep=' ', out=None, ambiwidth=None):
444 """Print data according to a tabular layout.
445
446 :param data: a sequence of rows; assume all rows are of equal length.
447 :param headers: an optional row containing column headers; must be of
448 the same length as each row in `data`.
449 :param sep: column separator
450 :param out: output file descriptor (`None` means use `sys.stdout`)
451 :param ambiwidth: column width of the East Asian Ambiguous (A). If None,
452 detect ambiwidth with the locale settings. If others,
453 pass to the `ambiwidth` parameter of `text_width`.
454 """
455 if out is None:
456 out = sys.stdout
457 charset = getattr(out, 'encoding', None) or 'utf-8'
458 if ambiwidth is None:
459 ambiwidth = _default_ambiwidth
460 data = list(data)
461 if headers:
462 data.insert(0, headers)
463 elif not data:
464 return
465
466
467
468 def to_text(val):
469 if val is None:
470 return u''
471 return to_unicode(val)
472
473 def tw(text):
474 return text_width(text, ambiwidth=ambiwidth)
475
476 def to_lines(data):
477 lines = []
478 for row in data:
479 row = [to_text(cell) for cell in row]
480 if any('\n' in cell for cell in row):
481 row = [cell.splitlines() for cell in row]
482 max_lines = max(len(cell) for cell in row)
483 for cell in row:
484 if len(cell) < max_lines:
485 cell += [''] * (max_lines - len(cell))
486 lines.extend([cell[idx] for cell in row]
487 for idx in xrange(max_lines))
488 else:
489 lines.append(row)
490 return lines
491
492 data = to_lines(data)
493
494 num_cols = len(data[0])
495 col_width = [max(tw(row[idx]) for row in data)
496 for idx in xrange(num_cols)]
497
498 out.write('\n')
499 for ridx, row in enumerate(data):
500 for cidx, cell in enumerate(row):
501 if cidx + 1 == num_cols:
502 line = cell
503 else:
504 if headers and ridx == 0:
505 sp = ' ' * tw(sep)
506 else:
507 sp = sep
508 line = u'%-*s%s' % (col_width[cidx] - tw(cell) + len(cell),
509 cell, sp)
510 line = line.encode(charset, 'replace')
511 out.write(line)
512
513 out.write('\n')
514 if ridx == 0 and headers:
515 out.write('-' * (tw(sep) * cidx + sum(col_width)))
516 out.write('\n')
517 out.write('\n')
518
521 """Truncates `text` to length less than or equal to `maxlen` characters.
522
523 This tries to be (a bit) clever and attempts to find a proper word
524 boundary for doing so.
525 """
526 if len(text or '') <= maxlen:
527 return text
528 suffix = ' ...'
529 maxtextlen = maxlen - len(suffix)
530 cut = max(text.rfind(' ', 0, maxtextlen), text.rfind('\n', 0, maxtextlen))
531 if cut < 0:
532 cut = maxtextlen
533 return text[:cut] + suffix
534
535
536 -class UnicodeTextWrapper(textwrap.TextWrapper):
537 breakable_char_ranges = [
538 (0x1100, 0x11FF),
539 (0x2E80, 0x2EFF),
540 (0x3000, 0x303F),
541 (0x3040, 0x309F),
542 (0x30A0, 0x30FF),
543 (0x3130, 0x318F),
544 (0x3190, 0x319F),
545 (0x31C0, 0x31EF),
546 (0x3200, 0x32FF),
547 (0x3300, 0x33FF),
548 (0x3400, 0x4DBF),
549 (0x4E00, 0x9FFF),
550 (0xA960, 0xA97F),
551 (0xAC00, 0xD7AF),
552 (0xD7B0, 0xD7FF),
553 (0xF900, 0xFAFF),
554 (0xFE30, 0xFE4F),
555 (0xFF00, 0xFFEF),
556 (0x20000, 0x2FFFF, u'[\uD840-\uD87F][\uDC00-\uDFFF]'),
557 (0x30000, 0x3FFFF, u'[\uD880-\uD8BF][\uDC00-\uDFFF]'),
558 ]
559
560 split_re = None
561 breakable_re = None
562
563 @classmethod
564 - def _init_patterns(cls):
565 char_ranges = []
566 surrogate_pairs = []
567 for val in cls.breakable_char_ranges:
568 try:
569 high = unichr(val[0])
570 low = unichr(val[1])
571 char_ranges.append(u'%s-%s' % (high, low))
572 except ValueError:
573
574 surrogate_pairs.append(val[2])
575 char_ranges = u''.join(char_ranges)
576 if surrogate_pairs:
577 pattern = u'(?:[%s]|%s)+' % (char_ranges,
578 u'|'.join(surrogate_pairs))
579 else:
580 pattern = u'[%s]+' % char_ranges
581
582 cls.split_re = re.compile(
583 r'(\s+|' +
584 pattern + u'|' +
585 r'[^\s\w]*\w+[^0-9\W]-(?=\w+[^0-9\W])|' +
586 r'(?<=[\w\!\"\'\&\.\,\?])-{2,}(?=\w))',
587 re.UNICODE)
588 cls.breakable_re = re.compile(r'\A' + pattern, re.UNICODE)
589
590 - def __init__(self, cols, replace_whitespace=0, break_long_words=0,
591 initial_indent='', subsequent_indent='', ambiwidth=1):
592 textwrap.TextWrapper.__init__(
593 self, cols, replace_whitespace=0, break_long_words=0,
594 initial_indent=initial_indent,
595 subsequent_indent=subsequent_indent)
596 self.ambiwidth = ambiwidth
597 if self.split_re is None:
598 self._init_patterns()
599
600 - def _split(self, text):
601 chunks = self.split_re.split(to_unicode(text))
602 chunks = filter(None, chunks)
603 return chunks
604
605 - def _text_width(self, text):
607
608 - def _wrap_chunks(self, chunks):
609 lines = []
610 chunks.reverse()
611 text_width = self._text_width
612
613 while chunks:
614 cur_line = []
615 cur_width = 0
616
617 if lines:
618 indent = self.subsequent_indent
619 else:
620 indent = self.initial_indent
621 width = self.width - text_width(indent)
622
623 if chunks[-1].strip() == '' and lines:
624 del chunks[-1]
625
626 while chunks:
627 chunk = chunks[-1]
628 w = text_width(chunk)
629 if cur_width + w <= width:
630 cur_line.append(chunks.pop())
631 cur_width += w
632 elif self.breakable_re.match(chunk):
633 left_space = width - cur_width
634 for i in xrange(len(chunk)):
635 w = text_width(chunk[i])
636 if left_space < w:
637 break
638 left_space -= w
639 if i > 0:
640 cur_line.append(chunk[:i])
641 chunk = chunk[i:]
642 chunks[-1] = chunk
643 w = text_width(chunk)
644 break
645 else:
646 break
647
648 if chunks and w > width:
649 self._handle_long_word(chunks, cur_line, cur_width, width)
650
651 if cur_line and cur_line[-1].strip() == '':
652 del cur_line[-1]
653
654 if cur_line:
655 lines.append(indent + ''.join(cur_line))
656
657 return lines
658
659
660 -def wrap(t, cols=75, initial_indent='', subsequent_indent='',
661 linesep=os.linesep, ambiwidth=1):
662 """Wraps the single paragraph in `t`, which contains unicode characters.
663 The every line is at most `cols` characters long.
664
665 That `ambiwidth` parameter is used for the column width of the East
666 Asian Ambiguous (A). If `1`, the same width as characters in US-ASCII.
667 This is expected by most users. If `2`, twice the width of US-ASCII
668 characters. This is expected by CJK users.
669 """
670 t = t.strip().replace('\r\n', '\n').replace('\r', '\n')
671 wrapper = UnicodeTextWrapper(cols, replace_whitespace=0,
672 break_long_words=0,
673 initial_indent=initial_indent,
674 subsequent_indent=subsequent_indent,
675 ambiwidth=ambiwidth)
676 wrappedLines = []
677 for line in t.split('\n'):
678 wrappedLines += wrapper.wrap(line.rstrip()) or ['']
679 return linesep.join(wrappedLines)
680
681
682 _obfuscation_char = u'@\u2026'
685 """Replace anything looking like an e-mail address (``'@something'``)
686 with a trailing ellipsis (``'@…'``)
687 """
688 if address:
689 at = address.find('@')
690 if at != -1:
691 return address[:at] + _obfuscation_char + \
692 ('>' if address[-1] == '>' else '')
693 return address
694
697 """Returns `True` if the `word` looks like an obfuscated e-mail
698 address.
699
700 :since: 1.2
701 """
702 return _obfuscation_char in word
703
706 """Make a path breakable after path separators, and conversely, avoid
707 breaking at spaces.
708 """
709 if not path:
710 return path
711 prefix = ''
712 if path.startswith('/'):
713 prefix = '/'
714 path = path[1:]
715 return prefix + path.replace('/', u'/\u200b').replace('\\', u'\\\u200b') \
716 .replace(' ', u'\u00a0')
717
720 """Normalize whitespace in a string, by replacing special spaces by normal
721 spaces and removing zero-width spaces."""
722 if not text:
723 return text
724 for each in to_space:
725 text = text.replace(each, ' ')
726 for each in remove:
727 text = text.replace(each, '')
728 return text
729
732 """Remove (one level of) enclosing single or double quotes.
733
734 .. versionadded :: 1.0
735 """
736 return txt[1:-1] if txt and txt[0] in "'\"" and txt[0] == txt[-1] else txt
737
740 """Removes uniform indentation and leading/trailing whitespace."""
741 from inspect import cleandoc
742 return cleandoc(message).strip()
743
748 """Pretty print content size information with appropriate unit.
749
750 :param size: number of bytes
751 :param format: can be used to adjust the precision shown
752 """
753 if size is None:
754 return ''
755
756 jump = 1024
757 if size < jump:
758 from trac.util.translation import ngettext
759 return ngettext("%(num)d byte", "%(num)d bytes", num=size)
760
761 units = ['KB', 'MB', 'GB', 'TB']
762 i = 0
763 while size >= jump and i < len(units):
764 i += 1
765 size /= 1024.
766
767 return (format + ' %s') % (size, units[i - 1])
768
771 """Expand tab characters `'\\\\t'` into spaces.
772
773 :param tabstop: number of space characters per tab
774 (defaults to the canonical 8)
775
776 :param ignoring: if not `None`, the expansion will be "smart" and
777 go from one tabstop to the next. In addition,
778 this parameter lists characters which can be
779 ignored when computing the indent.
780 """
781 if '\t' not in s:
782 return s
783 if ignoring is None:
784 return s.expandtabs(tabstop)
785
786 outlines = []
787 for line in s.split('\n'):
788 if '\t' not in line:
789 outlines.append(line)
790 continue
791 p = 0
792 s = []
793 for c in line:
794 if c == '\t':
795 n = tabstop - p % tabstop
796 s.append(' ' * n)
797 p += n
798 elif not ignoring or c not in ignoring:
799 p += 1
800 s.append(c)
801 else:
802 s.append(c)
803 outlines.append(''.join(s))
804 return '\n'.join(outlines)
805
808 """Fix end-of-lines in a text."""
809 lines = text.splitlines()
810 lines.append('')
811 return eol.join(lines)
812
814 """Safe conversion of ``text`` to base64 representation using
815 utf-8 bytes.
816
817 Strips newlines from output unless ``strip_newlines`` is `False`.
818 """
819 text = to_unicode(text)
820 if strip_newlines:
821 return text.encode('utf-8').encode('base64').replace('\n', '')
822 return text.encode('utf-8').encode('base64')
823
825 """Safe conversion of ``text`` to unicode based on utf-8 bytes."""
826 return text.decode('base64').decode('utf-8')
827
830 """Return the Levenshtein distance between two strings."""
831 if len(lhs) > len(rhs):
832 rhs, lhs = lhs, rhs
833 if not lhs:
834 return len(rhs)
835
836 prev = xrange(len(rhs) + 1)
837 for lidx, lch in enumerate(lhs):
838 curr = [lidx + 1]
839 for ridx, rch in enumerate(rhs):
840 cost = (lch != rch) * 2
841 curr.append(min(prev[ridx + 1] + 1,
842 curr[ridx] + 1,
843 prev[ridx] + cost))
844 prev = curr
845 return prev[-1]
846
847
848 sub_vars_re = re.compile("[$]([A-Z_][A-Z0-9_]*)")
851 """Substitute $XYZ-style variables in a string with provided values.
852
853 :param text: string containing variables to substitute.
854 :param args: dictionary with keys matching the variables to be substituted.
855 The keys should not be prefixed with the $ character."""
856 def repl(match):
857 key = match.group(1)
858 return args[key] if key in args else '$' + key
859 return sub_vars_re.sub(repl, text)
860