Edgewall Software

Ticket #2296: wikilatex.3.py

File wikilatex.3.py, 22.1 KB (added by Trent Apted <tapted@…>, 20 years ago)

Fixed some bugs. Tickets, reports, oneliners and revision logs and images are still broken (investigating)

Line 
1import re
2import os
3import urllib
4
5try:
6 from cStringIO import StringIO
7except ImportError:
8 from StringIO import StringIO
9
10from trac import util
11from trac.mimeview import *
12from trac.wiki.api import WikiSystem
13from trac.wiki.formatter import Formatter
14from trac.util import shorten_line, to_unicode
15
16__all__ = ['wiki_to_latex']
17
18def pretexfilter(s, escape=True):
19 """
20 Escape LaTeX seqeuences in \a s that do not affect Wiki formatting
21 """
22 s = s.replace('\\', '\textbackslash ') #<lbrace /><rbrace /> ??
23 s = s.replace('{', '\\{{<rbrace />').replace('}', '\\<rbrace />{<rbrace />').replace('<rbrace />', '}')
24 s = s.replace('$', '\\${}')
25 s = s.replace('&gt;', '\textgreater{}').replace('&lt;', '\textless{}').replace('&amp;', '\myamp{}')
26 s = s.replace('&', '\\&{}').replace('%', '\\mypercent{}') #etc
27 return s
28
29def gtotex(s, escape=True):
30 """
31 Escape LaTeX seqeuences in \a s that may still be present after processing.
32 Favour pretexfilter() because the LaTeX formatting added _during_ processing need to be kept
33 """
34 if not s:
35 return s
36 if escape:
37 #if s.find('<'):
38 # raise "Error"
39 s = re.sub(r'"([^,.) ])', r"``\1", s).replace('"', "''") #tex-ify quote character
40 s = s.replace('<', '\\textless{}').replace('>', '\\textgreater{}').replace('%', '\\mypercent{}') #should NEVER be any of these
41 s = s.replace('&gt;', '\textgreater{}').replace('&lt;', '\textless{}')
42 s = re.sub(r'^#', r'\\#{}', s)
43 s = re.sub(r'([^\\$])#', r'\1\\#{}', s) #replace hashes, if not already preceded by a backslash or $
44 s = re.sub(r'^_', r'\\_{}', s)
45 s = re.sub(r'([^\\$])_', r'\1\\_{}', s) #replace underscores, if not already preceded by a backslash or $
46 s = re.sub(r'^\^', '\\\\textasciicircum{}', s)
47 s = re.sub(r'([^$])^', r'\1\\\\textasciicircum{}', s)
48 return to_unicode(s)
49
50def gfilter_to_url(s, escape=True):
51 """
52 Filter out things escaped by pretexfilter that \url is able to cope with
53 """
54 if not s:
55 return s
56 s = s.replace('\\_{}', '_').replace('\\%{}', '%').replace('\\&{}amp;', '&')
57 return s
58
59def LatexURL(formatter, ns, target, text, escape=True):
60 """
61 Create a LaTeX \url with a given namespace, target and link text
62 """
63 if text == target:
64 return '\\url{' + (ns and ns + ':' or "") + gfilter_to_url(target) + '}';
65 return '\\anchortext{' + (gtotex(text) or gtotex(target) or "broken") + '}\\footnote{\\url{' + (ns and ns + ':' or "") + gfilter_to_url(target) + '}}';
66
67def WikiLatexURL(formatter, ns, target, text, escape=True):
68 """
69 Create a LaTeX \url with a given namespace, target and link text
70 """
71 if text == target:
72 return '\\anchortext{' + gtotex(target) + '} (\\S\\ref{sub:' + totexlabel(target) + '})'
73 return '\\anchortext{' + (gtotex(text) or gtotex(target) or "broken") + '} (\\S\\ref{sub:' + totexlabel(target) + '})\\footnote{\\url{' + (ns and ns + ':' or "") + gfilter_to_url(target) + '}}';
74
75def totexlabel(s, escape=True):
76 """
77 Create a valid LaTeX for \ref cross references
78 """
79 if not s:
80 return s
81 return s.replace('&', '').replace('$', '').replace(' ', '-').replace('"', '').replace("'", '').replace('#', ':').replace('{', '').replace('}', '').replace('~', '-')
82
83class LatexFormatter(Formatter):
84 WRITE_SECTION = 0
85 DELAY_SECTION = 1
86 DONE_SECTION = 2
87
88 flavor = 'latex'
89
90 def __init__(self, env, page, req=None, absurls=0, db=None, sectionlevel=""):
91 """
92 Constructor
93
94 \param sectionlevel set to null string to promote to section/subsection
95 """
96 Formatter.__init__(self, env, req, absurls, db)
97 self.pagename = page.name
98 self.sublevel=sectionlevel
99 self.inmacro = False
100 self.beginsection = self.WRITE_SECTION
101
102 def _get_link_resolvers(self):
103 return {'link':LatexURL, 'source':LatexURL, 'wiki':WikiLatexURL, 'ticket':LatexURL, 'changeset':LatexURL} #WikiSystem(self.env).link_resolvers
104 link_resolvers = property(_get_link_resolvers)
105
106 def _get_db(self):
107 if not self._db:
108 self._db = self.env.get_db_cnx()
109 return self._db
110 db = property(fget=_get_db)
111
112 def _get_rules(self):
113 return self.wiki.rules
114 rules = property(_get_rules)
115
116 def replace(self, fullmatch):
117 for itype, match in fullmatch.groupdict().items():
118 if match and not itype in self.wiki.helper_patterns:
119 # Check for preceding escape character '!'
120 if match[0] == '!':
121 return gtotex(pretexfilter(match[1:]))
122 if match[0:4] == "http":
123 return LatexURL(self, 'http', match, match)
124 elif match[0].isalpha():
125 return WikiLatexURL(self, 'wiki', match, match)
126 if itype in self.wiki.external_handlers:
127 return self.wiki.external_handlers[itype](self, match, fullmatch)
128 else:
129 return getattr(self, '_' + itype + '_formatter')(match, fullmatch)
130
131 def _tickethref_formatter(self, match, fullmatch):
132 """
133 This is from a patch for Trac 0.8 \todo how to call for 0.9+ (via link resolvers?)
134 """
135 number = int(match[1:])
136 cursor = self.db.cursor ()
137 cursor.execute('SELECT summary,status FROM ticket WHERE id=%s', number)
138 row = cursor.fetchone ()
139 if not row:
140 return '\\#%d (missing)' % (number)
141 else:
142 summary = gtotex(self.prefilter(util.shorten_line(row[0])))
143 if row[1] == 'new':
144 return '\\#%d*\\footnote{%s \\emph{(new)}}' % (number, summary)
145 elif row[1] == 'closed':
146 return '\\sout{\\#%d}\\footnote{%s \\emph{(closed)}}'% (number, summary)
147 else:
148 return '\\#%d\\footnote{%s}' % (number, summary)
149
150 def _bolditalic_formatter(self, match, fullmatch):
151 #italic = ('\\emph{', '}')
152 italic = ('\\bfseries\\itshape ', '')
153 italic_open = self.tag_open_p(italic)
154 tmp = ''
155 if italic_open:
156 tmp += italic[1]
157 self.close_tag(italic[1])
158 tmp += self._bold_formatter(match, fullmatch)
159 if not italic_open:
160 tmp += italic[0]
161 self.open_tag(*italic)
162 return tmp
163
164 def _unquote(self, text):
165 if text and text[0] in "'\"" and text[0] == text[-1]:
166 return text[1:-1]
167 else:
168 return text
169
170 def _shref_formatter(self, match, fullmatch):
171 ns = fullmatch.group('sns')
172 target = self._unquote(fullmatch.group('stgt'))
173 return self._make_link(ns, target, match, match)
174
175 def _make_link(self, ns, target, match, label):
176 if ns in self.link_resolvers:
177 return self.link_resolvers[ns](self, ns, target, label, False)
178 elif target.startswith('//') or ns == "mailto":
179 return self._make_ext_link(ns+':'+target, label)
180 else:
181 return gtotex(match)
182
183 def _make_ext_link(self, url, text, title=''):
184 same = text == url
185 url = gfilter_to_url(url)
186 ref = totexlabel(url)
187 text, title = gtotex(text), gtotex(title)
188 if Formatter.img_re.search(url) and self.flavor != 'oneliner':
189 return '\\url{%s} (extimage - todo: make figure float with caption %s)' % (
190 url, title or text)
191 if not url.startswith(self._local):
192 if same:
193 return '\\url{' + url + '}'
194 return '\\anchortext{%s}\\footnote{\\url{%s}}' % (text, url)
195 else:
196 if same:
197 return '\\url{' + url + '}'
198 return '\\anchortext{%s}\\footnote{ext: \\url{%s}}' % (text, url)
199
200 def _make_relative_link(self, url, text):
201 same = text == url
202 url = gfilter_to_url(url)
203 ref = totexlabel(url)
204 text = gtotex(text)
205 if Formatter.img_re.search(url) and self.flavor != 'oneliner':
206 return '\\url{%s} (relimage - todo: make figure float with caption %s)' % (
207 url, text)
208 if not url.startswith(self._local):
209 if same:
210 return '\\url{' + url + '}'
211 return '\\anchortext{%s}\\footnote{\\url{%s}}' % (text, url)
212 else:
213 if same:
214 return '\\url{' + url + '}'
215 return '\\anchortext{%s}\\footnote{\\url{%s}} or Subsection \\ref{sub:%s}' % (text, url, ref)
216
217 def _bold_formatter(self, match, fullmatch):
218 return self.simple_tag_handler('\\textbf{', '}')
219
220 def _italic_formatter(self, match, fullmatch):
221 return self.simple_tag_handler('\\textit{', '}')
222
223 def _underline_formatter(self, match, fullmatch):
224 if match[0] == '!':
225 return gtotex(pretexfilter(match[1:]))
226 else:
227 return self.simple_tag_handler('\\underbar{',
228 '}')
229
230 def _strike_formatter(self, match, fullmatch):
231 if match[0] == '!':
232 return gtotex(pretexfilter(match[1:]))
233 else:
234 return self.simple_tag_handler('\\sout{', '}')
235
236 def _subscript_formatter(self, match, fullmatch):
237 if match[0] == '!':
238 return gtotex(pretexfilter(match[1:]))
239 else:
240 return self.simple_tag_handler('$_{', '}$')
241
242 def _superscript_formatter(self, match, fullmatch):
243 if match[0] == '!':
244 return gtotex(pretexfilter(match[1:]))
245 else:
246 return self.simple_tag_handler('$^{', '}$')
247
248 def _inlinecode_formatter(self, match, fullmatch):
249 return '\\texttt{%s}' % gtotex(fullmatch.group('inline'))
250
251 def _inlinecode2_formatter(self, match, fullmatch):
252 return '\\texttt{%s}' % gtotex(fullmatch.group('inline2'))
253
254 def _htmlescape_formatter(self, match, fullmatch):
255 return match == "&" and "\\&{}" or \
256 match == "<" and "\\textless{}" or "\\textgreater{}"
257
258 def _macro_formatter(self, match, fullmatch):
259 name = fullmatch.group('macroname')
260 if name in ['br', 'BR']:
261 if len(self.current_line) < 7:
262 return '\\vspace{1ex}'
263 return '\\\\'
264 if name in ['TracGuideToc']:
265 return
266 if name in ['PageOutline']:
267 if self.beginsection == self.WRITE_SECTION:
268 self.beginsection = self.DELAY_SECTION
269 return '\\tableofcontents{}'
270 self.inmacro = True
271 s = '\\begin{verbatim}\n' + Formatter._macro_formatter(self, match, fullmatch) + '\n\\end{verbatim}\n'
272 #self.inmacro = False
273 return s
274
275# args = fullmatch.group('macroargs')
276# try:
277# macro = WikiProcessor(self.env, name)
278# return macro.process(self.req, args, 1)
279# except Exception, e:
280# self.env.log.error('Macro %s(%s) failed' % (name, args),
281# exc_info=True)
282# return system_message('Error: Macro %s(%s) failed' % (name, args), e)
283
284 def _heading_formatter(self, match, fullmatch):
285 match = match.strip()
286 self.close_table()
287 self.close_paragraph()
288 self.close_indentation()
289 self.close_list()
290 self.close_def_list()
291
292 depth = min(len(fullmatch.group('hdepth')), 5)
293 heading = match[depth + 1:len(match) - depth - 1]
294
295 star = ''
296 moresub = ''
297
298 if depth > 1 and self.sublevel == '':
299 moresub = 'sub'
300 if depth > 1 and self.sublevel != '' or depth > 2:
301 star = '*'
302
303 anchor = text = heading
304 sans_markup = re.sub(r'</?\w+(?: .*?)?>', '', text)
305
306 #check if valid LaTeX label
307 i = 1
308 anchor = anchor_base = anchor.encode('utf-8')
309 while anchor in self._anchors:
310 anchor = anchor_base + str(i)
311 i += 1
312 self._anchors.append(anchor)
313 try:
314 ##\todo gtotex on the next line should be latex_oneliner
315 return '\\' + self.sublevel + moresub + 'subsection' + star + '{\\label{anchor:%s}%s}' % (totexlabel(anchor), gtotex(text))
316 except:
317 return '\\' + self.sublevel + moresub + 'subsection' + star + '{Bad Unicode}'
318
319 def _indent_formatter(self, match, fullmatch):
320 depth = int((len(fullmatch.group('idepth')) + 1) / 2)
321 list_depth = len(self._list_stack)
322 if list_depth > 0 and depth == list_depth + 1:
323 self.in_list_item = 1
324 else:
325 self.open_indentation(depth)
326 return ''
327
328 def _last_table_cell_formatter(self, match, fullmatch):
329 return ''
330
331 def _table_cell_formatter(self, match, fullmatch):
332 self.open_table()
333 self.open_table_row()
334 if self.in_table_cell:
335 return ' & '
336 else:
337 self.in_table_cell = 1
338 return ''
339
340 def close_indentation(self):
341 self.out.write(('\\end{quote}' + os.linesep) * self.indent_level)
342 self.indent_level = 0
343
344 def open_indentation(self, depth):
345 if self.in_def_list:
346 return
347 diff = depth - self.indent_level
348 if diff != 0:
349 self.close_paragraph()
350 self.close_indentation()
351 self.close_list()
352 self.indent_level = depth
353 self.out.write(('\\begin{quote}' + os.linesep) * depth)
354
355 def _list_formatter(self, match, fullmatch):
356 ldepth = len(fullmatch.group('ldepth'))
357 depth = int((len(fullmatch.group('ldepth')) + 1) / 2)
358 self.in_list_item = depth > 0
359 type_ = ['ol', 'ul'][match[ldepth] == '*']
360 self._set_list_depth(depth, type_)
361 return ''
362
363 def _definition_formatter(self, match, fullmatch):
364 tmp = ''
365 if not self.in_def_list:
366 tmp += '\\begin{description}\n'
367
368 definition = match[:match.find('::')]
369 if definition[0] == ' ':
370 definition = definition[1:]
371 ##\todo next line should be latex_oneliner
372 tmp += '\\item[%s] ' % gtotex(definition)
373
374 self.in_def_list = True
375 return tmp
376
377 def close_def_list(self):
378 if self.in_def_list:
379 self.out.write('\\end{description}\n')
380 self.in_def_list = False
381
382 def _hl_to_ll(self, l):
383 if l == 'ul':
384 return 'itemize'
385 else:
386 return 'enumerate'
387
388 def _set_list_depth(self, depth, type_):
389 current_depth = len(self._list_stack)
390 diff = depth - current_depth
391 self.close_table()
392 self.close_paragraph()
393 self.close_indentation()
394 self.check_section()
395 if diff > 0:
396 for i in range(diff):
397 self._list_stack.append(type_)
398 self.out.write('\\begin{' + self._hl_to_ll(type_) + '}\n')
399 self.out.write('\\item ')
400 elif diff < 0:
401 for i in range(-diff):
402 tmp = self._list_stack.pop()
403 self.out.write('\\end{' + self._hl_to_ll(tmp) + '}\n')
404 if self._list_stack != [] and type_ != self._list_stack[-1]:
405 tmp = self._list_stack.pop()
406 self._list_stack.append(type_)
407 self.out.write('\n\\end{%s}\n\\begin{%s}\n\\item ' % (self._hl_to_ll(tmp), self._hl_to_ll(type_)))
408 if depth > 0:
409 self.out.write('\\item ')
410 # diff == 0
411 elif self._list_stack != [] and type_ != self._list_stack[-1]:
412 tmp = self._list_stack.pop()
413 self._list_stack.append(type_)
414 self.out.write('\n\\end{%s}\n\\begin{%s}\n\\item ' % (self._hl_to_ll(tmp), self._hl_to_ll(type_)))
415 elif depth > 0:
416 self.out.write('\\item ')
417
418 def close_list(self):
419 if self._list_stack != []:
420 self._set_list_depth(0, None)
421
422 def open_paragraph(self):
423 if not self.paragraph_open:
424 self.out.write(os.linesep)
425 self.paragraph_open = 1
426
427 def close_paragraph(self):
428 if self.paragraph_open:
429 while self._open_tags != []:
430 self.out.write(self._open_tags.pop()[1])
431 self.out.write(os.linesep)
432 self.paragraph_open = 0
433
434 def open_table(self):
435 if not self.in_table:
436 self.close_paragraph()
437 self.close_indentation()
438 self.close_list()
439 self.close_def_list()
440 self.in_table = 1
441 self.out.write('\\begin{tabular}{|l|l|l|l|l|l|l|l|l|} \\hline' + os.linesep)
442
443 def open_table_row(self):
444 if not self.in_table_row:
445 self.open_table()
446 self.in_table_row = 1
447 self.out.write(os.linesep)
448
449 def close_table_row(self):
450 if self.in_table_row:
451 self.in_table_row = 0
452 if self.in_table_cell:
453 self.in_table_cell = 0
454 self.out.write('~\\\\ \\hline' + os.linesep)
455
456 def close_table(self):
457 if self.in_table:
458 self.close_table_row()
459 self.out.write('\\end{tabular}' + os.linesep)
460 self.in_table = 0
461
462 def handle_code_block(self, line):
463 if line.strip() == '{{{':
464 self.in_code_block += 1
465 if self.in_code_block == 1:
466 self.out.write('\\begin{verbatim}' + os.linesep)
467 else:
468 self.out.write(line + os.linesep)
469 elif line.strip() == '}}}':
470 self.in_code_block -= 1
471 if self.in_code_block == 0:
472 self.out.write('\\end{verbatim}' + os.linesep)
473 else:
474 self.out.write(line + os.linesep)
475 else:
476 self.out.write(line + os.linesep)
477
478 def preamble(self):
479 self.secname = self.pagename
480 self.out.write('\\documentclass{article}' + os.linesep)
481 self.out.write('\\usepackage{url}' + os.linesep)
482 self.out.write('\\usepackage{ulem}' + os.linesep)
483# self.out.write("""\def\dotuline{\\bgroup
484# \\ifdim\\ULdepth=\\maxdimen % Set depth based on font, if not set already
485# \\settodepth\\ULdepth{(j}\\advance\\ULdepth.4pt\\fi
486# \\markoverwith{\\begingroup
487# \\advance\\ULdepth0.08ex
488# \\lower\\ULdepth\\hbox{\\kern.15em .\\kern.1em}%
489# \\endgroup}\\ULon}""")
490 self.out.write("""\\usepackage[pagebackref, bookmarks, pdfpagelabels,
491 citecolor=black, linkcolor=black, anchorcolor=blue, filecolor=black,
492 bookmarksopen=true, plainpages=false, colorlinks=true, bookmarksnumbered=true,
493 pdftitle={%s},
494 pdfauthor={userid}]{hyperref}
495""" % self.pagename)
496 self.out.write('\\newcommand{\\anchortext}[1]{\def\ULthickness{.2pt}\\underbar{#1}\def\ULthickness{.4pt}} %this does not appear to work properly' + os.linesep)
497 self.out.write('\\newcommand{\\mypercent}{\\%{}}' + os.linesep)
498 self.out.write('\\newcommand{\\myamp}{\\&{}}' + os.linesep)
499 self.out.write('\\setlength\\parskip{\\medskipamount}' + os.linesep) #this is more like the HTML, and probably what people expect
500 self.out.write('\\setlength\\parindent{0pt}' + os.linesep)
501
502 self.out.write('\\begin{document}' + os.linesep)
503 #self.out.write('\\' + self.sublevel + 'section{\\label{sub:' + self.secname + '}' + self.secname + '}' + os.linesep)
504
505 def check_section(self):
506 if self.beginsection == self.WRITE_SECTION:
507 self.out.write('\\' + self.sublevel + 'section{\\label{sub:' + self.secname + '}' + self.secname + '}' + os.linesep)
508 self.beginsection = self.DONE_SECTION
509 elif self.beginsection == self.DELAY_SECTION:
510 self.beginsection = self.WRITE_SECTION
511
512 def format(self, text, out, escape_newlines=False):
513 self.out = out
514 self._open_tags = []
515 self._list_stack = []
516
517 self.in_code_block = 0
518 self.in_table = 0
519 self.in_def_list = 0
520 self.in_table_row = 0
521 self.in_table_cell = 0
522 self.indent_level = 0
523 self.paragraph_open = 0
524
525 self.preamble()
526
527 for line in text.splitlines():
528 self.current_line = line
529 # Handle code block
530 if self.in_code_block or line.strip() == '{{{':
531 self.handle_code_block(line)
532 continue
533 # Handle Horizontal ruler
534 elif line[0:4] == '----':
535 self.close_paragraph()
536 self.close_indentation()
537 self.close_list()
538 self.close_def_list()
539 self.close_table()
540 self.out.write('{\\normalsize \\vspace{1ex} \\hrule width \\columnwidth \\vspace{1ex}}' + os.linesep)
541 continue
542 # Handle new paragraph
543 elif line == '':
544 self.close_paragraph()
545 self.close_indentation()
546 self.close_list()
547 self.close_def_list()
548 continue
549
550 if escape_newlines:
551 line += ' [[BR]]'
552 self.in_list_item = False
553 # Throw a bunch of regexps on the problem
554 try:
555 result = re.sub(self.rules, self.replace, line)
556 except TypeError, (e):
557 result = "\\textless Bad Unicode on this line \\textgreater"
558 try:
559 result += '% ' + e
560 except:
561 pass
562
563 if not self.in_list_item:
564 self.close_list()
565
566 if self.in_def_list and not line.startswith(' '):
567 self.close_def_list()
568
569 if self.in_table and line[0:2] != '||':
570 self.close_table()
571
572 self.check_section()
573
574 if len(result) and not self.in_list_item and not self.in_def_list \
575 and not self.in_table:
576 self.open_paragraph()
577 try:
578 out.write(gtotex(result, not self.inmacro) + os.linesep)
579 except:
580 out.write("\\textless Bad Unicode \\textgreater" + os.linesep)
581 self.close_table_row()
582 self.inmacro = False
583
584 self.close_table()
585 self.close_paragraph()
586 self.close_indentation()
587 self.close_list()
588 self.close_def_list()
589 self.out.write('\\end{document}')
590
591def wiki_to_latex(page, env, req, db=None, absurls=0, escape_newlines=False):
592 out = StringIO()
593 LatexFormatter(env, page, req, absurls, db).format(page.text, out, escape_newlines)
594 return util.Markup(out.getvalue())