| 1 | """Conversion pipeline templates.
|
|---|
| 2 |
|
|---|
| 3 | The problem:
|
|---|
| 4 | ------------
|
|---|
| 5 |
|
|---|
| 6 | Suppose you have some data that you want to convert to another format,
|
|---|
| 7 | such as from GIF image format to PPM image format. Maybe the
|
|---|
| 8 | conversion involves several steps (e.g. piping it through compress or
|
|---|
| 9 | uuencode). Some of the conversion steps may require that their input
|
|---|
| 10 | is a disk file, others may be able to read standard input; similar for
|
|---|
| 11 | their output. The input to the entire conversion may also be read
|
|---|
| 12 | from a disk file or from an open file, and similar for its output.
|
|---|
| 13 |
|
|---|
| 14 | The module lets you construct a pipeline template by sticking one or
|
|---|
| 15 | more conversion steps together. It will take care of creating and
|
|---|
| 16 | removing temporary files if they are necessary to hold intermediate
|
|---|
| 17 | data. You can then use the template to do conversions from many
|
|---|
| 18 | different sources to many different destinations. The temporary
|
|---|
| 19 | file names used are different each time the template is used.
|
|---|
| 20 |
|
|---|
| 21 | The templates are objects so you can create templates for many
|
|---|
| 22 | different conversion steps and store them in a dictionary, for
|
|---|
| 23 | instance.
|
|---|
| 24 |
|
|---|
| 25 |
|
|---|
| 26 | Directions:
|
|---|
| 27 | -----------
|
|---|
| 28 |
|
|---|
| 29 | To create a template:
|
|---|
| 30 | t = Template()
|
|---|
| 31 |
|
|---|
| 32 | To add a conversion step to a template:
|
|---|
| 33 | t.append(command, kind)
|
|---|
| 34 | where kind is a string of two characters: the first is '-' if the
|
|---|
| 35 | command reads its standard input or 'f' if it requires a file; the
|
|---|
| 36 | second likewise for the output. The command must be valid /bin/sh
|
|---|
| 37 | syntax. If input or output files are required, they are passed as
|
|---|
| 38 | $IN and $OUT; otherwise, it must be possible to use the command in
|
|---|
| 39 | a pipeline.
|
|---|
| 40 |
|
|---|
| 41 | To add a conversion step at the beginning:
|
|---|
| 42 | t.prepend(command, kind)
|
|---|
| 43 |
|
|---|
| 44 | To convert a file to another file using a template:
|
|---|
| 45 | sts = t.copy(infile, outfile)
|
|---|
| 46 | If infile or outfile are the empty string, standard input is read or
|
|---|
| 47 | standard output is written, respectively. The return value is the
|
|---|
| 48 | exit status of the conversion pipeline.
|
|---|
| 49 |
|
|---|
| 50 | To open a file for reading or writing through a conversion pipeline:
|
|---|
| 51 | fp = t.open(file, mode)
|
|---|
| 52 | where mode is 'r' to read the file, or 'w' to write it -- just like
|
|---|
| 53 | for the built-in function open() or for os.popen().
|
|---|
| 54 |
|
|---|
| 55 | To create a new template object initialized to a given one:
|
|---|
| 56 | t2 = t.clone()
|
|---|
| 57 |
|
|---|
| 58 | For an example, see the function test() at the end of the file.
|
|---|
| 59 | """ # '
|
|---|
| 60 |
|
|---|
| 61 |
|
|---|
| 62 | import re
|
|---|
| 63 |
|
|---|
| 64 | import os
|
|---|
| 65 | import tempfile
|
|---|
| 66 | import string
|
|---|
| 67 |
|
|---|
| 68 | __all__ = ["Template"]
|
|---|
| 69 |
|
|---|
| 70 | # Conversion step kinds
|
|---|
| 71 |
|
|---|
| 72 | FILEIN_FILEOUT = 'ff' # Must read & write real files
|
|---|
| 73 | STDIN_FILEOUT = '-f' # Must write a real file
|
|---|
| 74 | FILEIN_STDOUT = 'f-' # Must read a real file
|
|---|
| 75 | STDIN_STDOUT = '--' # Normal pipeline element
|
|---|
| 76 | SOURCE = '.-' # Must be first, writes stdout
|
|---|
| 77 | SINK = '-.' # Must be last, reads stdin
|
|---|
| 78 |
|
|---|
| 79 | stepkinds = [FILEIN_FILEOUT, STDIN_FILEOUT, FILEIN_STDOUT, STDIN_STDOUT, \
|
|---|
| 80 | SOURCE, SINK]
|
|---|
| 81 |
|
|---|
| 82 |
|
|---|
| 83 | class Template:
|
|---|
| 84 | """Class representing a pipeline template."""
|
|---|
| 85 |
|
|---|
| 86 | def __init__(self):
|
|---|
| 87 | """Template() returns a fresh pipeline template."""
|
|---|
| 88 | self.debugging = 0
|
|---|
| 89 | self.reset()
|
|---|
| 90 |
|
|---|
| 91 | def __repr__(self):
|
|---|
| 92 | """t.__repr__() implements repr(t)."""
|
|---|
| 93 | return '<Template instance, steps=%r>' % (self.steps,)
|
|---|
| 94 |
|
|---|
| 95 | def reset(self):
|
|---|
| 96 | """t.reset() restores a pipeline template to its initial state."""
|
|---|
| 97 | self.steps = []
|
|---|
| 98 |
|
|---|
| 99 | def clone(self):
|
|---|
| 100 | """t.clone() returns a new pipeline template with identical
|
|---|
| 101 | initial state as the current one."""
|
|---|
| 102 | t = Template()
|
|---|
| 103 | t.steps = self.steps[:]
|
|---|
| 104 | t.debugging = self.debugging
|
|---|
| 105 | return t
|
|---|
| 106 |
|
|---|
| 107 | def debug(self, flag):
|
|---|
| 108 | """t.debug(flag) turns debugging on or off."""
|
|---|
| 109 | self.debugging = flag
|
|---|
| 110 |
|
|---|
| 111 | def append(self, cmd, kind):
|
|---|
| 112 | """t.append(cmd, kind) adds a new step at the end."""
|
|---|
| 113 | if type(cmd) is not type(''):
|
|---|
| 114 | raise TypeError, \
|
|---|
| 115 | 'Template.append: cmd must be a string'
|
|---|
| 116 | if kind not in stepkinds:
|
|---|
| 117 | raise ValueError, \
|
|---|
| 118 | 'Template.append: bad kind %r' % (kind,)
|
|---|
| 119 | if kind == SOURCE:
|
|---|
| 120 | raise ValueError, \
|
|---|
| 121 | 'Template.append: SOURCE can only be prepended'
|
|---|
| 122 | if self.steps and self.steps[-1][1] == SINK:
|
|---|
| 123 | raise ValueError, \
|
|---|
| 124 | 'Template.append: already ends with SINK'
|
|---|
| 125 | if kind[0] == 'f' and not re.search(r'\$IN\b', cmd):
|
|---|
| 126 | raise ValueError, \
|
|---|
| 127 | 'Template.append: missing $IN in cmd'
|
|---|
| 128 | if kind[1] == 'f' and not re.search(r'\$OUT\b', cmd):
|
|---|
| 129 | raise ValueError, \
|
|---|
| 130 | 'Template.append: missing $OUT in cmd'
|
|---|
| 131 | self.steps.append((cmd, kind))
|
|---|
| 132 |
|
|---|
| 133 | def prepend(self, cmd, kind):
|
|---|
| 134 | """t.prepend(cmd, kind) adds a new step at the front."""
|
|---|
| 135 | if type(cmd) is not type(''):
|
|---|
| 136 | raise TypeError, \
|
|---|
| 137 | 'Template.prepend: cmd must be a string'
|
|---|
| 138 | if kind not in stepkinds:
|
|---|
| 139 | raise ValueError, \
|
|---|
| 140 | 'Template.prepend: bad kind %r' % (kind,)
|
|---|
| 141 | if kind == SINK:
|
|---|
| 142 | raise ValueError, \
|
|---|
| 143 | 'Template.prepend: SINK can only be appended'
|
|---|
| 144 | if self.steps and self.steps[0][1] == SOURCE:
|
|---|
| 145 | raise ValueError, \
|
|---|
| 146 | 'Template.prepend: already begins with SOURCE'
|
|---|
| 147 | if kind[0] == 'f' and not re.search(r'\$IN\b', cmd):
|
|---|
| 148 | raise ValueError, \
|
|---|
| 149 | 'Template.prepend: missing $IN in cmd'
|
|---|
| 150 | if kind[1] == 'f' and not re.search(r'\$OUT\b', cmd):
|
|---|
| 151 | raise ValueError, \
|
|---|
| 152 | 'Template.prepend: missing $OUT in cmd'
|
|---|
| 153 | self.steps.insert(0, (cmd, kind))
|
|---|
| 154 |
|
|---|
| 155 | def open(self, file, rw):
|
|---|
| 156 | """t.open(file, rw) returns a pipe or file object open for
|
|---|
| 157 | reading or writing; the file is the other end of the pipeline."""
|
|---|
| 158 | if rw == 'r':
|
|---|
| 159 | return self.open_r(file)
|
|---|
| 160 | if rw == 'w':
|
|---|
| 161 | return self.open_w(file)
|
|---|
| 162 | raise ValueError, \
|
|---|
| 163 | 'Template.open: rw must be \'r\' or \'w\', not %r' % (rw,)
|
|---|
| 164 |
|
|---|
| 165 | def open_r(self, file):
|
|---|
| 166 | """t.open_r(file) and t.open_w(file) implement
|
|---|
| 167 | t.open(file, 'r') and t.open(file, 'w') respectively."""
|
|---|
| 168 | if not self.steps:
|
|---|
| 169 | return open(file, 'r')
|
|---|
| 170 | if self.steps[-1][1] == SINK:
|
|---|
| 171 | raise ValueError, \
|
|---|
| 172 | 'Template.open_r: pipeline ends width SINK'
|
|---|
| 173 | cmd = self.makepipeline(file, '')
|
|---|
| 174 | return os.popen(cmd, 'r')
|
|---|
| 175 |
|
|---|
| 176 | def open_w(self, file):
|
|---|
| 177 | if not self.steps:
|
|---|
| 178 | return open(file, 'w')
|
|---|
| 179 | if self.steps[0][1] == SOURCE:
|
|---|
| 180 | raise ValueError, \
|
|---|
| 181 | 'Template.open_w: pipeline begins with SOURCE'
|
|---|
| 182 | cmd = self.makepipeline('', file)
|
|---|
| 183 | return os.popen(cmd, 'w')
|
|---|
| 184 |
|
|---|
| 185 | def copy(self, infile, outfile):
|
|---|
| 186 | return os.system(self.makepipeline(infile, outfile))
|
|---|
| 187 |
|
|---|
| 188 | def makepipeline(self, infile, outfile):
|
|---|
| 189 | cmd = makepipeline(infile, self.steps, outfile)
|
|---|
| 190 | if self.debugging:
|
|---|
| 191 | print cmd
|
|---|
| 192 | cmd = 'set -x; ' + cmd
|
|---|
| 193 | return cmd
|
|---|
| 194 |
|
|---|
| 195 |
|
|---|
| 196 | def makepipeline(infile, steps, outfile):
|
|---|
| 197 | # Build a list with for each command:
|
|---|
| 198 | # [input filename or '', command string, kind, output filename or '']
|
|---|
| 199 |
|
|---|
| 200 | list = []
|
|---|
| 201 | for cmd, kind in steps:
|
|---|
| 202 | list.append(['', cmd, kind, ''])
|
|---|
| 203 | #
|
|---|
| 204 | # Make sure there is at least one step
|
|---|
| 205 | #
|
|---|
| 206 | if not list:
|
|---|
| 207 | list.append(['', 'cat', '--', ''])
|
|---|
| 208 | #
|
|---|
| 209 | # Take care of the input and output ends
|
|---|
| 210 | #
|
|---|
| 211 | [cmd, kind] = list[0][1:3]
|
|---|
| 212 | if kind[0] == 'f' and not infile:
|
|---|
| 213 | list.insert(0, ['', 'cat', '--', ''])
|
|---|
| 214 | list[0][0] = infile
|
|---|
| 215 | #
|
|---|
| 216 | [cmd, kind] = list[-1][1:3]
|
|---|
| 217 | if kind[1] == 'f' and not outfile:
|
|---|
| 218 | list.append(['', 'cat', '--', ''])
|
|---|
| 219 | list[-1][-1] = outfile
|
|---|
| 220 | #
|
|---|
| 221 | # Invent temporary files to connect stages that need files
|
|---|
| 222 | #
|
|---|
| 223 | garbage = []
|
|---|
| 224 | for i in range(1, len(list)):
|
|---|
| 225 | lkind = list[i-1][2]
|
|---|
| 226 | rkind = list[i][2]
|
|---|
| 227 | if lkind[1] == 'f' or rkind[0] == 'f':
|
|---|
| 228 | (fd, temp) = tempfile.mkstemp()
|
|---|
| 229 | os.close(fd)
|
|---|
| 230 | garbage.append(temp)
|
|---|
| 231 | list[i-1][-1] = list[i][0] = temp
|
|---|
| 232 | #
|
|---|
| 233 | for item in list:
|
|---|
| 234 | [inf, cmd, kind, outf] = item
|
|---|
| 235 | if kind[1] == 'f':
|
|---|
| 236 | cmd = 'OUT=' + quote(outf) + '; ' + cmd
|
|---|
| 237 | if kind[0] == 'f':
|
|---|
| 238 | cmd = 'IN=' + quote(inf) + '; ' + cmd
|
|---|
| 239 | if kind[0] == '-' and inf:
|
|---|
| 240 | cmd = cmd + ' <' + quote(inf)
|
|---|
| 241 | if kind[1] == '-' and outf:
|
|---|
| 242 | cmd = cmd + ' >' + quote(outf)
|
|---|
| 243 | item[1] = cmd
|
|---|
| 244 | #
|
|---|
| 245 | cmdlist = list[0][1]
|
|---|
| 246 | for item in list[1:]:
|
|---|
| 247 | [cmd, kind] = item[1:3]
|
|---|
| 248 | if item[0] == '':
|
|---|
| 249 | if 'f' in kind:
|
|---|
| 250 | cmd = '{ ' + cmd + '; }'
|
|---|
| 251 | cmdlist = cmdlist + ' |\n' + cmd
|
|---|
| 252 | else:
|
|---|
| 253 | cmdlist = cmdlist + '\n' + cmd
|
|---|
| 254 | #
|
|---|
| 255 | if garbage:
|
|---|
| 256 | rmcmd = 'rm -f'
|
|---|
| 257 | for file in garbage:
|
|---|
| 258 | rmcmd = rmcmd + ' ' + quote(file)
|
|---|
| 259 | trapcmd = 'trap ' + quote(rmcmd + '; exit') + ' 1 2 3 13 14 15'
|
|---|
| 260 | cmdlist = trapcmd + '\n' + cmdlist + '\n' + rmcmd
|
|---|
| 261 | #
|
|---|
| 262 | return cmdlist
|
|---|
| 263 |
|
|---|
| 264 |
|
|---|
| 265 | # Reliably quote a string as a single argument for /bin/sh
|
|---|
| 266 |
|
|---|
| 267 | _safechars = string.ascii_letters + string.digits + '!@%_-+=:,./' # Safe unquoted
|
|---|
| 268 | _funnychars = '"`$\\' # Unsafe inside "double quotes"
|
|---|
| 269 |
|
|---|
| 270 | def quote(file):
|
|---|
| 271 | for c in file:
|
|---|
| 272 | if c not in _safechars:
|
|---|
| 273 | break
|
|---|
| 274 | else:
|
|---|
| 275 | return file
|
|---|
| 276 | if '\'' not in file:
|
|---|
| 277 | return '\'' + file + '\''
|
|---|
| 278 | res = ''
|
|---|
| 279 | for c in file:
|
|---|
| 280 | if c in _funnychars:
|
|---|
| 281 | c = '\\' + c
|
|---|
| 282 | res = res + c
|
|---|
| 283 | return '"' + res + '"'
|
|---|
| 284 |
|
|---|
| 285 |
|
|---|
| 286 | # Small test program and example
|
|---|
| 287 |
|
|---|
| 288 | def test():
|
|---|
| 289 | print 'Testing...'
|
|---|
| 290 | t = Template()
|
|---|
| 291 | t.append('togif $IN $OUT', 'ff')
|
|---|
| 292 | t.append('giftoppm', '--')
|
|---|
| 293 | t.append('ppmtogif >$OUT', '-f')
|
|---|
| 294 | t.append('fromgif $IN $OUT', 'ff')
|
|---|
| 295 | t.debug(1)
|
|---|
| 296 | FILE = '/usr/local/images/rgb/rogues/guido.rgb'
|
|---|
| 297 | t.copy(FILE, '@temp')
|
|---|
| 298 | print 'Done.'
|
|---|