| 1 | #! /usr/bin/env python
|
|---|
| 2 |
|
|---|
| 3 | """Mirror a remote ftp subtree into a local directory tree.
|
|---|
| 4 |
|
|---|
| 5 | usage: ftpmirror [-v] [-q] [-i] [-m] [-n] [-r] [-s pat]
|
|---|
| 6 | [-l username [-p passwd [-a account]]]
|
|---|
| 7 | hostname[:port] [remotedir [localdir]]
|
|---|
| 8 | -v: verbose
|
|---|
| 9 | -q: quiet
|
|---|
| 10 | -i: interactive mode
|
|---|
| 11 | -m: macintosh server (NCSA telnet 2.4) (implies -n -s '*.o')
|
|---|
| 12 | -n: don't log in
|
|---|
| 13 | -r: remove local files/directories no longer pertinent
|
|---|
| 14 | -l username [-p passwd [-a account]]: login info (default .netrc or anonymous)
|
|---|
| 15 | -s pat: skip files matching pattern
|
|---|
| 16 | hostname: remote host w/ optional port separated by ':'
|
|---|
| 17 | remotedir: remote directory (default initial)
|
|---|
| 18 | localdir: local directory (default current)
|
|---|
| 19 | """
|
|---|
| 20 |
|
|---|
| 21 | import os
|
|---|
| 22 | import sys
|
|---|
| 23 | import time
|
|---|
| 24 | import getopt
|
|---|
| 25 | import ftplib
|
|---|
| 26 | import netrc
|
|---|
| 27 | from fnmatch import fnmatch
|
|---|
| 28 |
|
|---|
| 29 | # Print usage message and exit
|
|---|
| 30 | def usage(*args):
|
|---|
| 31 | sys.stdout = sys.stderr
|
|---|
| 32 | for msg in args: print msg
|
|---|
| 33 | print __doc__
|
|---|
| 34 | sys.exit(2)
|
|---|
| 35 |
|
|---|
| 36 | verbose = 1 # 0 for -q, 2 for -v
|
|---|
| 37 | interactive = 0
|
|---|
| 38 | mac = 0
|
|---|
| 39 | rmok = 0
|
|---|
| 40 | nologin = 0
|
|---|
| 41 | skippats = ['.', '..', '.mirrorinfo']
|
|---|
| 42 |
|
|---|
| 43 | # Main program: parse command line and start processing
|
|---|
| 44 | def main():
|
|---|
| 45 | global verbose, interactive, mac, rmok, nologin
|
|---|
| 46 | try:
|
|---|
| 47 | opts, args = getopt.getopt(sys.argv[1:], 'a:bil:mnp:qrs:v')
|
|---|
| 48 | except getopt.error, msg:
|
|---|
| 49 | usage(msg)
|
|---|
| 50 | login = ''
|
|---|
| 51 | passwd = ''
|
|---|
| 52 | account = ''
|
|---|
| 53 | if not args: usage('hostname missing')
|
|---|
| 54 | host = args[0]
|
|---|
| 55 | port = 0
|
|---|
| 56 | if ':' in host:
|
|---|
| 57 | host, port = host.split(':', 1)
|
|---|
| 58 | port = int(port)
|
|---|
| 59 | try:
|
|---|
| 60 | auth = netrc.netrc().authenticators(host)
|
|---|
| 61 | if auth is not None:
|
|---|
| 62 | login, account, passwd = auth
|
|---|
| 63 | except (netrc.NetrcParseError, IOError):
|
|---|
| 64 | pass
|
|---|
| 65 | for o, a in opts:
|
|---|
| 66 | if o == '-l': login = a
|
|---|
| 67 | if o == '-p': passwd = a
|
|---|
| 68 | if o == '-a': account = a
|
|---|
| 69 | if o == '-v': verbose = verbose + 1
|
|---|
| 70 | if o == '-q': verbose = 0
|
|---|
| 71 | if o == '-i': interactive = 1
|
|---|
| 72 | if o == '-m': mac = 1; nologin = 1; skippats.append('*.o')
|
|---|
| 73 | if o == '-n': nologin = 1
|
|---|
| 74 | if o == '-r': rmok = 1
|
|---|
| 75 | if o == '-s': skippats.append(a)
|
|---|
| 76 | remotedir = ''
|
|---|
| 77 | localdir = ''
|
|---|
| 78 | if args[1:]:
|
|---|
| 79 | remotedir = args[1]
|
|---|
| 80 | if args[2:]:
|
|---|
| 81 | localdir = args[2]
|
|---|
| 82 | if args[3:]: usage('too many arguments')
|
|---|
| 83 | #
|
|---|
| 84 | f = ftplib.FTP()
|
|---|
| 85 | if verbose: print "Connecting to '%s%s'..." % (host,
|
|---|
| 86 | (port and ":%d"%port or ""))
|
|---|
| 87 | f.connect(host,port)
|
|---|
| 88 | if not nologin:
|
|---|
| 89 | if verbose:
|
|---|
| 90 | print 'Logging in as %r...' % (login or 'anonymous')
|
|---|
| 91 | f.login(login, passwd, account)
|
|---|
| 92 | if verbose: print 'OK.'
|
|---|
| 93 | pwd = f.pwd()
|
|---|
| 94 | if verbose > 1: print 'PWD =', repr(pwd)
|
|---|
| 95 | if remotedir:
|
|---|
| 96 | if verbose > 1: print 'cwd(%s)' % repr(remotedir)
|
|---|
| 97 | f.cwd(remotedir)
|
|---|
| 98 | if verbose > 1: print 'OK.'
|
|---|
| 99 | pwd = f.pwd()
|
|---|
| 100 | if verbose > 1: print 'PWD =', repr(pwd)
|
|---|
| 101 | #
|
|---|
| 102 | mirrorsubdir(f, localdir)
|
|---|
| 103 |
|
|---|
| 104 | # Core logic: mirror one subdirectory (recursively)
|
|---|
| 105 | def mirrorsubdir(f, localdir):
|
|---|
| 106 | pwd = f.pwd()
|
|---|
| 107 | if localdir and not os.path.isdir(localdir):
|
|---|
| 108 | if verbose: print 'Creating local directory', repr(localdir)
|
|---|
| 109 | try:
|
|---|
| 110 | makedir(localdir)
|
|---|
| 111 | except os.error, msg:
|
|---|
| 112 | print "Failed to establish local directory", repr(localdir)
|
|---|
| 113 | return
|
|---|
| 114 | infofilename = os.path.join(localdir, '.mirrorinfo')
|
|---|
| 115 | try:
|
|---|
| 116 | text = open(infofilename, 'r').read()
|
|---|
| 117 | except IOError, msg:
|
|---|
| 118 | text = '{}'
|
|---|
| 119 | try:
|
|---|
| 120 | info = eval(text)
|
|---|
| 121 | except (SyntaxError, NameError):
|
|---|
| 122 | print 'Bad mirror info in', repr(infofilename)
|
|---|
| 123 | info = {}
|
|---|
| 124 | subdirs = []
|
|---|
| 125 | listing = []
|
|---|
| 126 | if verbose: print 'Listing remote directory %r...' % (pwd,)
|
|---|
| 127 | f.retrlines('LIST', listing.append)
|
|---|
| 128 | filesfound = []
|
|---|
| 129 | for line in listing:
|
|---|
| 130 | if verbose > 1: print '-->', repr(line)
|
|---|
| 131 | if mac:
|
|---|
| 132 | # Mac listing has just filenames;
|
|---|
| 133 | # trailing / means subdirectory
|
|---|
| 134 | filename = line.strip()
|
|---|
| 135 | mode = '-'
|
|---|
| 136 | if filename[-1:] == '/':
|
|---|
| 137 | filename = filename[:-1]
|
|---|
| 138 | mode = 'd'
|
|---|
| 139 | infostuff = ''
|
|---|
| 140 | else:
|
|---|
| 141 | # Parse, assuming a UNIX listing
|
|---|
| 142 | words = line.split(None, 8)
|
|---|
| 143 | if len(words) < 6:
|
|---|
| 144 | if verbose > 1: print 'Skipping short line'
|
|---|
| 145 | continue
|
|---|
| 146 | filename = words[-1].lstrip()
|
|---|
| 147 | i = filename.find(" -> ")
|
|---|
| 148 | if i >= 0:
|
|---|
| 149 | # words[0] had better start with 'l'...
|
|---|
| 150 | if verbose > 1:
|
|---|
| 151 | print 'Found symbolic link %r' % (filename,)
|
|---|
| 152 | linkto = filename[i+4:]
|
|---|
| 153 | filename = filename[:i]
|
|---|
| 154 | infostuff = words[-5:-1]
|
|---|
| 155 | mode = words[0]
|
|---|
| 156 | skip = 0
|
|---|
| 157 | for pat in skippats:
|
|---|
| 158 | if fnmatch(filename, pat):
|
|---|
| 159 | if verbose > 1:
|
|---|
| 160 | print 'Skip pattern', repr(pat),
|
|---|
| 161 | print 'matches', repr(filename)
|
|---|
| 162 | skip = 1
|
|---|
| 163 | break
|
|---|
| 164 | if skip:
|
|---|
| 165 | continue
|
|---|
| 166 | if mode[0] == 'd':
|
|---|
| 167 | if verbose > 1:
|
|---|
| 168 | print 'Remembering subdirectory', repr(filename)
|
|---|
| 169 | subdirs.append(filename)
|
|---|
| 170 | continue
|
|---|
| 171 | filesfound.append(filename)
|
|---|
| 172 | if info.has_key(filename) and info[filename] == infostuff:
|
|---|
| 173 | if verbose > 1:
|
|---|
| 174 | print 'Already have this version of',repr(filename)
|
|---|
| 175 | continue
|
|---|
| 176 | fullname = os.path.join(localdir, filename)
|
|---|
| 177 | tempname = os.path.join(localdir, '@'+filename)
|
|---|
| 178 | if interactive:
|
|---|
| 179 | doit = askabout('file', filename, pwd)
|
|---|
| 180 | if not doit:
|
|---|
| 181 | if not info.has_key(filename):
|
|---|
| 182 | info[filename] = 'Not retrieved'
|
|---|
| 183 | continue
|
|---|
| 184 | try:
|
|---|
| 185 | os.unlink(tempname)
|
|---|
| 186 | except os.error:
|
|---|
| 187 | pass
|
|---|
| 188 | if mode[0] == 'l':
|
|---|
| 189 | if verbose:
|
|---|
| 190 | print "Creating symlink %r -> %r" % (filename, linkto)
|
|---|
| 191 | try:
|
|---|
| 192 | os.symlink(linkto, tempname)
|
|---|
| 193 | except IOError, msg:
|
|---|
| 194 | print "Can't create %r: %s" % (tempname, msg)
|
|---|
| 195 | continue
|
|---|
| 196 | else:
|
|---|
| 197 | try:
|
|---|
| 198 | fp = open(tempname, 'wb')
|
|---|
| 199 | except IOError, msg:
|
|---|
| 200 | print "Can't create %r: %s" % (tempname, msg)
|
|---|
| 201 | continue
|
|---|
| 202 | if verbose:
|
|---|
| 203 | print 'Retrieving %r from %r as %r...' % (filename, pwd, fullname)
|
|---|
| 204 | if verbose:
|
|---|
| 205 | fp1 = LoggingFile(fp, 1024, sys.stdout)
|
|---|
| 206 | else:
|
|---|
| 207 | fp1 = fp
|
|---|
| 208 | t0 = time.time()
|
|---|
| 209 | try:
|
|---|
| 210 | f.retrbinary('RETR ' + filename,
|
|---|
| 211 | fp1.write, 8*1024)
|
|---|
| 212 | except ftplib.error_perm, msg:
|
|---|
| 213 | print msg
|
|---|
| 214 | t1 = time.time()
|
|---|
| 215 | bytes = fp.tell()
|
|---|
| 216 | fp.close()
|
|---|
| 217 | if fp1 != fp:
|
|---|
| 218 | fp1.close()
|
|---|
| 219 | try:
|
|---|
| 220 | os.unlink(fullname)
|
|---|
| 221 | except os.error:
|
|---|
| 222 | pass # Ignore the error
|
|---|
| 223 | try:
|
|---|
| 224 | os.rename(tempname, fullname)
|
|---|
| 225 | except os.error, msg:
|
|---|
| 226 | print "Can't rename %r to %r: %s" % (tempname, fullname, msg)
|
|---|
| 227 | continue
|
|---|
| 228 | info[filename] = infostuff
|
|---|
| 229 | writedict(info, infofilename)
|
|---|
| 230 | if verbose and mode[0] != 'l':
|
|---|
| 231 | dt = t1 - t0
|
|---|
| 232 | kbytes = bytes / 1024.0
|
|---|
| 233 | print int(round(kbytes)),
|
|---|
| 234 | print 'Kbytes in',
|
|---|
| 235 | print int(round(dt)),
|
|---|
| 236 | print 'seconds',
|
|---|
| 237 | if t1 > t0:
|
|---|
| 238 | print '(~%d Kbytes/sec)' % \
|
|---|
| 239 | int(round(kbytes/dt),)
|
|---|
| 240 | print
|
|---|
| 241 | #
|
|---|
| 242 | # Remove files from info that are no longer remote
|
|---|
| 243 | deletions = 0
|
|---|
| 244 | for filename in info.keys():
|
|---|
| 245 | if filename not in filesfound:
|
|---|
| 246 | if verbose:
|
|---|
| 247 | print "Removing obsolete info entry for",
|
|---|
| 248 | print repr(filename), "in", repr(localdir or ".")
|
|---|
| 249 | del info[filename]
|
|---|
| 250 | deletions = deletions + 1
|
|---|
| 251 | if deletions:
|
|---|
| 252 | writedict(info, infofilename)
|
|---|
| 253 | #
|
|---|
| 254 | # Remove local files that are no longer in the remote directory
|
|---|
| 255 | try:
|
|---|
| 256 | if not localdir: names = os.listdir(os.curdir)
|
|---|
| 257 | else: names = os.listdir(localdir)
|
|---|
| 258 | except os.error:
|
|---|
| 259 | names = []
|
|---|
| 260 | for name in names:
|
|---|
| 261 | if name[0] == '.' or info.has_key(name) or name in subdirs:
|
|---|
| 262 | continue
|
|---|
| 263 | skip = 0
|
|---|
| 264 | for pat in skippats:
|
|---|
| 265 | if fnmatch(name, pat):
|
|---|
| 266 | if verbose > 1:
|
|---|
| 267 | print 'Skip pattern', repr(pat),
|
|---|
| 268 | print 'matches', repr(name)
|
|---|
| 269 | skip = 1
|
|---|
| 270 | break
|
|---|
| 271 | if skip:
|
|---|
| 272 | continue
|
|---|
| 273 | fullname = os.path.join(localdir, name)
|
|---|
| 274 | if not rmok:
|
|---|
| 275 | if verbose:
|
|---|
| 276 | print 'Local file', repr(fullname),
|
|---|
| 277 | print 'is no longer pertinent'
|
|---|
| 278 | continue
|
|---|
| 279 | if verbose: print 'Removing local file/dir', repr(fullname)
|
|---|
| 280 | remove(fullname)
|
|---|
| 281 | #
|
|---|
| 282 | # Recursively mirror subdirectories
|
|---|
| 283 | for subdir in subdirs:
|
|---|
| 284 | if interactive:
|
|---|
| 285 | doit = askabout('subdirectory', subdir, pwd)
|
|---|
| 286 | if not doit: continue
|
|---|
| 287 | if verbose: print 'Processing subdirectory', repr(subdir)
|
|---|
| 288 | localsubdir = os.path.join(localdir, subdir)
|
|---|
| 289 | pwd = f.pwd()
|
|---|
| 290 | if verbose > 1:
|
|---|
| 291 | print 'Remote directory now:', repr(pwd)
|
|---|
| 292 | print 'Remote cwd', repr(subdir)
|
|---|
| 293 | try:
|
|---|
| 294 | f.cwd(subdir)
|
|---|
| 295 | except ftplib.error_perm, msg:
|
|---|
| 296 | print "Can't chdir to", repr(subdir), ":", repr(msg)
|
|---|
| 297 | else:
|
|---|
| 298 | if verbose: print 'Mirroring as', repr(localsubdir)
|
|---|
| 299 | mirrorsubdir(f, localsubdir)
|
|---|
| 300 | if verbose > 1: print 'Remote cwd ..'
|
|---|
| 301 | f.cwd('..')
|
|---|
| 302 | newpwd = f.pwd()
|
|---|
| 303 | if newpwd != pwd:
|
|---|
| 304 | print 'Ended up in wrong directory after cd + cd ..'
|
|---|
| 305 | print 'Giving up now.'
|
|---|
| 306 | break
|
|---|
| 307 | else:
|
|---|
| 308 | if verbose > 1: print 'OK.'
|
|---|
| 309 |
|
|---|
| 310 | # Helper to remove a file or directory tree
|
|---|
| 311 | def remove(fullname):
|
|---|
| 312 | if os.path.isdir(fullname) and not os.path.islink(fullname):
|
|---|
| 313 | try:
|
|---|
| 314 | names = os.listdir(fullname)
|
|---|
| 315 | except os.error:
|
|---|
| 316 | names = []
|
|---|
| 317 | ok = 1
|
|---|
| 318 | for name in names:
|
|---|
| 319 | if not remove(os.path.join(fullname, name)):
|
|---|
| 320 | ok = 0
|
|---|
| 321 | if not ok:
|
|---|
| 322 | return 0
|
|---|
| 323 | try:
|
|---|
| 324 | os.rmdir(fullname)
|
|---|
| 325 | except os.error, msg:
|
|---|
| 326 | print "Can't remove local directory %r: %s" % (fullname, msg)
|
|---|
| 327 | return 0
|
|---|
| 328 | else:
|
|---|
| 329 | try:
|
|---|
| 330 | os.unlink(fullname)
|
|---|
| 331 | except os.error, msg:
|
|---|
| 332 | print "Can't remove local file %r: %s" % (fullname, msg)
|
|---|
| 333 | return 0
|
|---|
| 334 | return 1
|
|---|
| 335 |
|
|---|
| 336 | # Wrapper around a file for writing to write a hash sign every block.
|
|---|
| 337 | class LoggingFile:
|
|---|
| 338 | def __init__(self, fp, blocksize, outfp):
|
|---|
| 339 | self.fp = fp
|
|---|
| 340 | self.bytes = 0
|
|---|
| 341 | self.hashes = 0
|
|---|
| 342 | self.blocksize = blocksize
|
|---|
| 343 | self.outfp = outfp
|
|---|
| 344 | def write(self, data):
|
|---|
| 345 | self.bytes = self.bytes + len(data)
|
|---|
| 346 | hashes = int(self.bytes) / self.blocksize
|
|---|
| 347 | while hashes > self.hashes:
|
|---|
| 348 | self.outfp.write('#')
|
|---|
| 349 | self.outfp.flush()
|
|---|
| 350 | self.hashes = self.hashes + 1
|
|---|
| 351 | self.fp.write(data)
|
|---|
| 352 | def close(self):
|
|---|
| 353 | self.outfp.write('\n')
|
|---|
| 354 |
|
|---|
| 355 | # Ask permission to download a file.
|
|---|
| 356 | def askabout(filetype, filename, pwd):
|
|---|
| 357 | prompt = 'Retrieve %s %s from %s ? [ny] ' % (filetype, filename, pwd)
|
|---|
| 358 | while 1:
|
|---|
| 359 | reply = raw_input(prompt).strip().lower()
|
|---|
| 360 | if reply in ['y', 'ye', 'yes']:
|
|---|
| 361 | return 1
|
|---|
| 362 | if reply in ['', 'n', 'no', 'nop', 'nope']:
|
|---|
| 363 | return 0
|
|---|
| 364 | print 'Please answer yes or no.'
|
|---|
| 365 |
|
|---|
| 366 | # Create a directory if it doesn't exist. Recursively create the
|
|---|
| 367 | # parent directory as well if needed.
|
|---|
| 368 | def makedir(pathname):
|
|---|
| 369 | if os.path.isdir(pathname):
|
|---|
| 370 | return
|
|---|
| 371 | dirname = os.path.dirname(pathname)
|
|---|
| 372 | if dirname: makedir(dirname)
|
|---|
| 373 | os.mkdir(pathname, 0777)
|
|---|
| 374 |
|
|---|
| 375 | # Write a dictionary to a file in a way that can be read back using
|
|---|
| 376 | # rval() but is still somewhat readable (i.e. not a single long line).
|
|---|
| 377 | # Also creates a backup file.
|
|---|
| 378 | def writedict(dict, filename):
|
|---|
| 379 | dir, fname = os.path.split(filename)
|
|---|
| 380 | tempname = os.path.join(dir, '@' + fname)
|
|---|
| 381 | backup = os.path.join(dir, fname + '~')
|
|---|
| 382 | try:
|
|---|
| 383 | os.unlink(backup)
|
|---|
| 384 | except os.error:
|
|---|
| 385 | pass
|
|---|
| 386 | fp = open(tempname, 'w')
|
|---|
| 387 | fp.write('{\n')
|
|---|
| 388 | for key, value in dict.items():
|
|---|
| 389 | fp.write('%r: %r,\n' % (key, value))
|
|---|
| 390 | fp.write('}\n')
|
|---|
| 391 | fp.close()
|
|---|
| 392 | try:
|
|---|
| 393 | os.rename(filename, backup)
|
|---|
| 394 | except os.error:
|
|---|
| 395 | pass
|
|---|
| 396 | os.rename(tempname, filename)
|
|---|
| 397 |
|
|---|
| 398 |
|
|---|
| 399 | if __name__ == '__main__':
|
|---|
| 400 | main()
|
|---|