Package tracopt :: Package versioncontrol :: Package git :: Module PyGIT

Source Code for Module tracopt.versioncontrol.git.PyGIT

   1  # -*- coding: utf-8 -*- 
   2  # 
   3  # Copyright (C) 2012-2020 Edgewall Software 
   4  # Copyright (C) 2006-2011, Herbert Valerio Riedel <[email protected]> 
   5  # All rights reserved. 
   6  # 
   7  # This software is licensed as described in the file COPYING, which 
   8  # you should have received as part of this distribution. The terms 
   9  # are also available at https://trac.edgewall.org/wiki/TracLicense. 
  10  # 
  11  # This software consists of voluntary contributions made by many 
  12  # individuals. For the exact contribution history, see the revision 
  13  # history and logs, available at https://trac.edgewall.org/log/. 
  14   
  15  from __future__ import with_statement 
  16   
  17  import os 
  18  import codecs 
  19  from collections import deque 
  20  from contextlib import contextmanager 
  21  import cStringIO 
  22  from functools import partial 
  23  import re 
  24  from subprocess import Popen, PIPE 
  25  import tempfile 
  26  from threading import Lock 
  27  import weakref 
  28   
  29  from trac.util import terminate 
  30  from trac.util.compat import close_fds 
  31  from trac.util.datefmt import time_now 
  32  from trac.util.text import exception_to_unicode, to_unicode 
  33   
  34  __all__ = ['GitError', 'GitErrorSha', 'Storage', 'StorageFactory'] 
35 36 37 -class GitError(Exception):
38 pass
39
40 -class GitErrorSha(GitError):
41 pass
42
43 # Helper functions 44 45 -def parse_commit(raw):
46 """Parse the raw content of a commit (as given by `git cat-file -p <rev>`). 47 48 Return the commit message and a dict of properties. 49 """ 50 if not raw: 51 raise GitErrorSha 52 lines = raw.splitlines() 53 if not lines: 54 raise GitErrorSha 55 line = lines.pop(0) 56 props = {} 57 multiline = multiline_key = None 58 while line: 59 if line[0] == ' ': 60 if not multiline: 61 multiline_key = key 62 multiline = [props[multiline_key][-1]] 63 multiline.append(line[1:]) 64 else: 65 key, value = line.split(None, 1) 66 props.setdefault(key, []).append(value.strip()) 67 line = lines.pop(0) 68 if multiline and (not line or key != multiline_key): 69 props[multiline_key][-1] = '\n'.join(multiline) 70 multiline = None 71 return '\n'.join(lines), props
72 73 74 _unquote_re = re.compile(r'\\(?:[abtnvfr"\\]|[0-7]{3})') 75 _unquote_chars = {'a': '\a', 'b': '\b', 't': '\t', 'n': '\n', 'v': '\v', 76 'f': '\f', 'r': '\r', '"': '"', '\\': '\\'}
77 78 79 -def _unquote(path):
80 if path.startswith('"') and path.endswith('"'): 81 def replace(match): 82 s = match.group(0)[1:] 83 if len(s) == 3: 84 return chr(int(s, 8)) # \ooo 85 return _unquote_chars[s]
86 path = _unquote_re.sub(replace, path[1:-1]) 87 return path 88
89 90 -def _close_proc_pipes(proc):
91 if proc: 92 for f in (proc.stdin, proc.stdout, proc.stderr): 93 if f: 94 f.close()
95
96 97 -class GitCore(object):
98 """Low-level wrapper around git executable""" 99
100 - def __init__(self, git_dir=None, git_bin='git', log=None, 101 fs_encoding=None):
102 self.__git_bin = git_bin 103 self.__git_dir = git_dir 104 self.__log = log 105 self.__fs_encoding = fs_encoding
106
107 - def __repr__(self):
108 return '<GitCore bin="%s" dir="%s">' % (self.__git_bin, 109 self.__git_dir)
110
111 - def __build_git_cmd(self, gitcmd, *args):
112 """construct command tuple for git call suitable for Popen()""" 113 114 cmd = [self.__git_bin] 115 if self.__git_dir: 116 cmd.append('--git-dir=%s' % self.__git_dir) 117 cmd.append(gitcmd) 118 cmd.extend(args) 119 120 fs_encoding = self.__fs_encoding 121 if fs_encoding is not None: 122 if os.name == 'nt': 123 # For Windows, Popen() accepts only ANSI encoding 124 def to_cmd_encoding(arg): 125 if not isinstance(arg, unicode): 126 arg = arg.decode(fs_encoding, 'replace') 127 return arg.encode('mbcs', 'replace')
128 else: 129 def to_cmd_encoding(arg): 130 if isinstance(arg, unicode): 131 arg = arg.encode(fs_encoding, 'replace') 132 return arg
133 cmd = map(to_cmd_encoding, cmd) 134 return cmd 135
136 - def __pipe(self, git_cmd, *cmd_args, **kw):
137 kw.setdefault('stdin', PIPE) 138 kw.setdefault('stdout', PIPE) 139 kw.setdefault('stderr', PIPE) 140 return Popen(self.__build_git_cmd(git_cmd, *cmd_args), 141 close_fds=close_fds, **kw)
142
143 - def __execute(self, git_cmd, *cmd_args):
144 """execute git command and return file-like object of stdout""" 145 146 #print >>sys.stderr, "DEBUG:", git_cmd, cmd_args 147 148 p = self.__pipe(git_cmd, stdout=PIPE, stderr=PIPE, *cmd_args) 149 stdout_data, stderr_data = p.communicate() 150 _close_proc_pipes(p) 151 if self.__log and (p.returncode != 0 or stderr_data): 152 self.__log.debug('%s exits with %d, dir: %r, args: %s %r, ' 153 'stderr: %r', self.__git_bin, p.returncode, 154 self.__git_dir, git_cmd, cmd_args, stderr_data) 155 156 return stdout_data
157
158 - def cat_file_batch(self):
159 return self.__pipe('cat-file', '--batch', stdin=PIPE, stdout=PIPE)
160
161 - def log_pipe(self, *cmd_args):
162 return self.__pipe('log', stdout=PIPE, *cmd_args)
163
164 - def __getattr__(self, name):
165 if name[0] == '_' or name in ['cat_file_batch', 'log_pipe']: 166 raise AttributeError, name 167 return partial(self.__execute, name.replace('_','-'))
168 169 __is_sha_pat = re.compile(r'[0-9A-Fa-f]*$') 170 171 @classmethod
172 - def is_sha(cls, sha):
173 """returns whether sha is a potential sha id 174 (i.e. proper hexstring between 4 and 40 characters) 175 """ 176 177 # quick test before starting up regexp matcher 178 if not (4 <= len(sha) <= 40): 179 return False 180 181 return bool(cls.__is_sha_pat.match(sha))
182
183 184 -class SizedDict(dict):
185 """Size-bounded dictionary with FIFO replacement strategy""" 186
187 - def __init__(self, max_size=0):
188 dict.__init__(self) 189 self.__max_size = max_size 190 self.__key_fifo = deque() 191 self.__lock = Lock()
192
193 - def __setitem__(self, name, value):
194 with self.__lock: 195 assert len(self) == len(self.__key_fifo) # invariant 196 197 if not self.__contains__(name): 198 self.__key_fifo.append(name) 199 200 rc = dict.__setitem__(self, name, value) 201 202 while len(self.__key_fifo) > self.__max_size: 203 self.__delitem__(self.__key_fifo.popleft()) 204 205 assert len(self) == len(self.__key_fifo) # invariant 206 207 return rc
208
209 - def setdefault(self, *_):
210 raise NotImplementedError("SizedDict has no setdefault() method")
211
212 213 -class StorageFactory(object):
214 __dict = weakref.WeakValueDictionary() 215 __dict_nonweak = {} 216 __dict_rev_cache = {} 217 __dict_lock = Lock() 218
219 - def __init__(self, repo, log, weak=True, git_bin='git', 220 git_fs_encoding=None):
221 self.logger = log 222 223 with self.__dict_lock: 224 if weak: 225 # remove additional reference which is created 226 # with non-weak argument 227 try: 228 del self.__dict_nonweak[repo] 229 except KeyError: 230 pass 231 try: 232 i = self.__dict[repo] 233 except KeyError: 234 rev_cache = self.__dict_rev_cache.get(repo) 235 i = Storage(repo, log, git_bin, git_fs_encoding, rev_cache) 236 self.__dict[repo] = i 237 238 # create additional reference depending on 'weak' argument 239 if not weak: 240 self.__dict_nonweak[repo] = i 241 242 self.__inst = i 243 self.logger.debug("requested %s PyGIT.Storage instance for '%s'", 244 'weak' if weak else 'non-weak', repo)
245
246 - def getInstance(self):
247 return self.__inst
248 249 @classmethod
250 - def set_rev_cache(cls, repo, rev_cache):
251 with cls.__dict_lock: 252 cls.__dict_rev_cache[repo] = rev_cache
253 254 @classmethod
255 - def _clean(cls):
256 """For testing purpose only""" 257 with cls.__dict_lock: 258 cls.__dict.clear() 259 cls.__dict_nonweak.clear() 260 cls.__dict_rev_cache.clear()
261
262 263 -class Storage(object):
264 """High-level wrapper around GitCore with in-memory caching""" 265 266 __SREV_MIN = 4 # minimum short-rev length 267
268 - class RevCache(object):
269 270 __slots__ = ('youngest_rev', 'oldest_rev', 'rev_dict', 'refs_dict', 271 'srev_dict') 272
273 - def __init__(self, youngest_rev, oldest_rev, rev_dict, refs_dict, 274 srev_dict):
275 self.youngest_rev = youngest_rev 276 self.oldest_rev = oldest_rev 277 self.rev_dict = rev_dict 278 self.refs_dict = refs_dict 279 self.srev_dict = srev_dict 280 if youngest_rev is not None and oldest_rev is not None and \ 281 rev_dict and refs_dict and srev_dict: 282 pass # all fields are not empty 283 elif not youngest_rev and not oldest_rev and \ 284 not rev_dict and not refs_dict and not srev_dict: 285 pass # all fields are empty 286 else: 287 raise ValueError('Invalid RevCache fields: %r' % self)
288 289 @classmethod
290 - def empty(cls):
291 return cls(None, None, {}, {}, {})
292
293 - def __repr__(self):
294 return 'RevCache(youngest_rev=%r, oldest_rev=%r, ' \ 295 'rev_dict=%d entries, refs_dict=%d entries, ' \ 296 'srev_dict=%d entries)' % \ 297 (self.youngest_rev, self.oldest_rev, len(self.rev_dict), 298 len(self.refs_dict), len(self.srev_dict))
299
300 - def iter_branches(self):
301 head = self.refs_dict.get('HEAD') 302 for refname, rev in self.refs_dict.iteritems(): 303 if refname.startswith('refs/heads/'): 304 yield refname[11:], rev, refname == head
305
306 - def iter_tags(self):
307 for refname, rev in self.refs_dict.iteritems(): 308 if refname.startswith('refs/tags/'): 309 yield refname[10:], rev
310 311 @staticmethod
312 - def __rev_key(rev):
313 assert len(rev) >= 4 314 #assert GitCore.is_sha(rev) 315 srev_key = int(rev[:4], 16) 316 assert srev_key >= 0 and srev_key <= 0xffff 317 return srev_key
318 319 @staticmethod
320 - def git_version(git_bin='git'):
321 GIT_VERSION_MIN_REQUIRED = (1, 5, 6) 322 try: 323 g = GitCore(git_bin=git_bin) 324 [v] = g.version().splitlines() 325 version = v.strip().split()[2] 326 # 'version' has usually at least 3 numeric version 327 # components, e.g.:: 328 # 1.5.4.2 329 # 1.5.4.3.230.g2db511 330 # 1.5.4.GIT 331 332 def try_int(s): 333 try: 334 return int(s) 335 except ValueError: 336 return s
337 338 split_version = tuple(map(try_int, version.split('.'))) 339 340 result = {} 341 result['v_str'] = version 342 result['v_tuple'] = split_version 343 result['v_min_tuple'] = GIT_VERSION_MIN_REQUIRED 344 result['v_min_str'] = ".".join(map(str, GIT_VERSION_MIN_REQUIRED)) 345 result['v_compatible'] = split_version >= GIT_VERSION_MIN_REQUIRED 346 return result 347 348 except Exception, e: 349 raise GitError("Could not retrieve GIT version (tried to " 350 "execute/parse '%s --version' but got %s)" 351 % (git_bin, repr(e)))
352
353 - def __init__(self, git_dir, log, git_bin='git', git_fs_encoding=None, 354 rev_cache=None):
355 """Initialize PyGit.Storage instance 356 357 `git_dir`: path to .git folder; 358 this setting is not affected by the `git_fs_encoding` setting 359 360 `log`: logger instance 361 362 `git_bin`: path to executable 363 this setting is not affected by the `git_fs_encoding` setting 364 365 `git_fs_encoding`: encoding used for paths stored in git repository; 366 if `None`, no implicit decoding/encoding to/from 367 unicode objects is performed, and bytestrings are 368 returned instead 369 """ 370 371 self.logger = log 372 373 self.commit_encoding = None 374 375 # caches 376 self.__rev_cache = rev_cache or self.RevCache.empty() 377 self.__rev_cache_refresh = True 378 self.__rev_cache_lock = Lock() 379 380 # cache the last 200 commit messages 381 self.__commit_msg_cache = SizedDict(200) 382 self.__commit_msg_lock = Lock() 383 384 self.__cat_file_pipe = None 385 self.__cat_file_pipe_lock = Lock() 386 387 if git_fs_encoding is not None: 388 # validate encoding name 389 codecs.lookup(git_fs_encoding) 390 391 # setup conversion functions 392 self._fs_to_unicode = lambda s: s.decode(git_fs_encoding, 393 'replace') 394 self._fs_from_unicode = lambda s: s.encode(git_fs_encoding) 395 else: 396 # pass bytestrings as-is w/o any conversion 397 self._fs_to_unicode = self._fs_from_unicode = lambda s: s 398 399 # simple sanity checking 400 try: 401 os.listdir(git_dir) 402 except EnvironmentError, e: 403 self._raise_not_readable(git_dir, e) 404 if not self._control_files_exist(git_dir): 405 dot_git_dir = os.path.join(git_dir, '.git') 406 try: 407 os.listdir(dot_git_dir) 408 except EnvironmentError: 409 missing = True 410 else: 411 if self._control_files_exist(dot_git_dir): 412 missing = False 413 git_dir = dot_git_dir 414 else: 415 missing = True 416 if missing: 417 self.logger.error("GIT control files missing in '%s'", 418 git_dir) 419 raise GitError("GIT control files not found, maybe wrong " 420 "directory?") 421 422 # at least, check that the HEAD file is readable 423 try: 424 with open(os.path.join(git_dir, 'HEAD'), 'rb'): 425 pass 426 except EnvironmentError, e: 427 self._raise_not_readable(git_dir, e) 428 429 self.repo = GitCore(git_dir, git_bin, log, git_fs_encoding) 430 self.repo_path = git_dir 431 432 self.logger.debug("PyGIT.Storage instance for '%s' is constructed", 433 git_dir)
434
435 - def _cleanup_proc(self, proc):
436 if proc: 437 _close_proc_pipes(proc) 438 terminate(proc) 439 proc.wait()
440
441 - def __del__(self):
442 with self.__cat_file_pipe_lock: 443 self._cleanup_proc(self.__cat_file_pipe)
444 445 # 446 # cache handling 447 # 448
449 - def invalidate_rev_cache(self):
450 with self.__rev_cache_lock: 451 self.__rev_cache_refresh = True
452 453 @property
454 - def rev_cache(self):
455 """Retrieve revision cache 456 457 may rebuild cache on the fly if required 458 459 returns RevCache tuple 460 """ 461 with self.__rev_cache_lock: 462 self._refresh_rev_cache() 463 return self.__rev_cache
464
465 - def _refresh_rev_cache(self, force=False):
466 refreshed = False 467 if force or self.__rev_cache_refresh: 468 self.__rev_cache_refresh = False 469 refs = self._get_refs() 470 if self.__rev_cache.refs_dict != refs: 471 self.logger.debug("Detected changes in git repository " 472 "'%s'", self.repo_path) 473 rev_cache = self._build_rev_cache(refs) 474 self.__rev_cache = rev_cache 475 StorageFactory.set_rev_cache(self.repo_path, rev_cache) 476 refreshed = True 477 else: 478 self.logger.debug("Detected no changes in git repository " 479 "'%s'", self.repo_path) 480 return refreshed
481
482 - def _build_rev_cache(self, refs):
483 self.logger.debug("triggered rebuild of commit tree db for '%s'", 484 self.repo_path) 485 ts0 = time_now() 486 487 new_db = {} # db 488 new_sdb = {} # short_rev db 489 490 # helper for reusing strings 491 revs_seen = {} 492 def _rev_reuse(rev): 493 return revs_seen.setdefault(rev, rev)
494 495 refs = dict((refname, _rev_reuse(rev)) 496 for refname, rev in refs.iteritems()) 497 head_revs = set(rev for refname, rev in refs.iteritems() 498 if refname.startswith('refs/heads/')) 499 rev_list = [map(_rev_reuse, line.split()) 500 for line in self.repo.rev_list('--parents', '--topo-order', 501 '--all').splitlines()] 502 revs_seen = None 503 504 if rev_list: 505 # first rev seen is assumed to be the youngest one 506 youngest = rev_list[0][0] 507 # last rev seen is assumed to be the oldest one 508 oldest = rev_list[-1][0] 509 else: 510 youngest = oldest = None 511 512 rheads_seen = {} 513 def _rheads_reuse(rheads): 514 rheads = frozenset(rheads) 515 return rheads_seen.setdefault(rheads, rheads) 516 517 __rev_key = self.__rev_key 518 for ord_rev, revs in enumerate(rev_list): 519 rev = revs[0] 520 parents = revs[1:] 521 522 # shortrev "hash" map 523 new_sdb.setdefault(__rev_key(rev), []).append(rev) 524 525 # new_db[rev] = (children(rev), parents(rev), 526 # ordinal_id(rev), rheads(rev)) 527 if rev in new_db: 528 # (incomplete) entry was already created by children 529 _children, _parents, _ord_rev, _rheads = new_db[rev] 530 assert _children 531 assert not _parents 532 assert _ord_rev == 0 533 else: # new entry 534 _children = set() 535 _rheads = set() 536 if rev in head_revs: 537 _rheads.add(rev) 538 539 # create/update entry 540 # transform into frozenset and tuple since entry will be final 541 new_db[rev] = (frozenset(_children), tuple(parents), ord_rev + 1, 542 _rheads_reuse(_rheads)) 543 544 # update parents(rev)s 545 for parent in parents: 546 # by default, a dummy ordinal_id is used for the mean-time 547 _children, _parents, _ord_rev, _rheads2 = \ 548 new_db.setdefault(parent, (set(), [], 0, set())) 549 550 # update parent(rev)'s children 551 _children.add(rev) 552 553 # update parent(rev)'s rheads 554 _rheads2.update(_rheads) 555 556 rheads_seen = None 557 558 # convert sdb either to dict or array depending on size 559 tmp = [()] * (max(new_sdb.keys()) + 1) if len(new_sdb) > 5000 else {} 560 try: 561 while True: 562 k, v = new_sdb.popitem() 563 tmp[k] = tuple(v) 564 except KeyError: 565 pass 566 assert len(new_sdb) == 0 567 new_sdb = tmp 568 569 rev_cache = self.RevCache(youngest, oldest, new_db, refs, new_sdb) 570 self.logger.debug("rebuilt commit tree db for '%s' with %d entries " 571 "(took %.1f ms)", self.repo_path, len(new_db), 572 1000 * (time_now() - ts0)) 573 return rev_cache 574
575 - def _get_refs(self):
576 refs = {} 577 tags = {} 578 579 for line in self.repo.show_ref('--dereference').splitlines(): 580 if ' ' not in line: 581 continue 582 rev, refname = line.split(' ', 1) 583 if refname.endswith('^{}'): # derefered tag 584 tags[refname[:-3]] = rev 585 else: 586 refs[refname] = rev 587 refs.update(tags.iteritems()) 588 589 if refs: 590 refname = (self.repo.symbolic_ref('-q', 'HEAD') or '').strip() 591 if refname in refs: 592 refs['HEAD'] = refname 593 594 return refs
595
596 - def get_branches(self):
597 """returns list of (local) branches, with active (= HEAD) one being 598 the first item 599 """ 600 branches = sorted(((self._fs_to_unicode(name), rev, head) 601 for name, rev, head 602 in self.rev_cache.iter_branches()), 603 key=lambda (name, rev, head): (not head, name)) 604 return [(name, rev) for name, rev, head in branches]
605
606 - def get_refs(self):
607 for refname, rev in self.rev_cache.refs_dict.iteritems(): 608 if refname != 'HEAD': 609 yield refname, rev
610
611 - def get_commits(self):
612 return self.rev_cache.rev_dict
613
614 - def oldest_rev(self):
615 return self.rev_cache.oldest_rev
616
617 - def youngest_rev(self):
618 return self.rev_cache.youngest_rev
619
620 - def get_branch_contains(self, sha, resolve=False):
621 """return list of reachable head sha ids or (names, sha) pairs if 622 resolve is true 623 624 see also get_branches() 625 """ 626 627 _rev_cache = self.rev_cache 628 629 try: 630 rheads = _rev_cache.rev_dict[sha][3] 631 except KeyError: 632 return [] 633 634 if resolve: 635 return sorted((self._fs_to_unicode(name), rev) 636 for name, rev, head in _rev_cache.iter_branches() 637 if rev in rheads) 638 else: 639 return list(rheads)
640
641 - def history_relative_rev(self, sha, rel_pos):
642 db = self.get_commits() 643 644 if sha not in db: 645 raise GitErrorSha() 646 647 if rel_pos == 0: 648 return sha 649 650 lin_rev = db[sha][2] + rel_pos 651 652 if lin_rev < 1 or lin_rev > len(db): 653 return None 654 655 for k, v in db.iteritems(): 656 if v[2] == lin_rev: 657 return k 658 659 # should never be reached if db is consistent 660 raise GitError("internal inconsistency detected")
661
662 - def hist_next_revision(self, sha):
663 return self.history_relative_rev(sha, -1)
664
665 - def hist_prev_revision(self, sha):
666 return self.history_relative_rev(sha, +1)
667
668 - def get_commit_encoding(self):
669 if self.commit_encoding is None: 670 self.commit_encoding = \ 671 self.repo.config('--get', 'i18n.commitEncoding').strip() or \ 672 'utf-8' 673 674 return self.commit_encoding
675
676 - def head(self):
677 """get current HEAD commit id""" 678 return self.verifyrev('HEAD')
679
680 - def cat_file(self, kind, sha):
681 return self._cat_file_reader(kind, sha).read()
682
683 - def _cat_file_reader(self, kind, sha):
684 with self.__cat_file_pipe_lock: 685 if self.__cat_file_pipe is None: 686 self.__cat_file_pipe = self.repo.cat_file_batch() 687 688 try: 689 self.__cat_file_pipe.stdin.write(sha + '\n') 690 self.__cat_file_pipe.stdin.flush() 691 692 split_stdout_line = self.__cat_file_pipe.stdout.readline() \ 693 .split() 694 if len(split_stdout_line) != 3: 695 raise GitError("internal error (could not split line " 696 "'%s')" % (split_stdout_line,)) 697 698 _sha, _type, _size = split_stdout_line 699 700 if _type != kind: 701 raise GitError("internal error (got unexpected object " 702 "kind '%s', expected '%s')" 703 % (_type, kind)) 704 705 size = int(_size) 706 707 # stdout.read() can return fewer bytes than requested, 708 # especially if a pipe buffers because the contents are 709 # larger than 64k. 710 stdout_read = self.__cat_file_pipe.stdout.read 711 if size > 32 * 1024 * 1024: 712 buf = tempfile.TemporaryFile() 713 else: 714 buf = cStringIO.StringIO() 715 remaining = size + 1 716 while remaining > 0: 717 chunk = stdout_read(min(remaining, 65536)) 718 if not chunk: 719 # No new data, let's abort 720 raise GitError("internal error (expected to read %d " 721 "bytes, but only got %d)" % 722 (size + 1, size + 1 - remaining)) 723 remaining -= len(chunk) 724 buf.write(chunk if remaining > 0 else chunk[:-1]) 725 726 buf.seek(0) 727 return buf 728 except Exception, e: 729 # There was an error, we should close the pipe to get to a 730 # consistent state (Otherwise it happens that next time we 731 # call cat_file we get payload from previous call) 732 self.logger.warning("closing cat_file pipe: %s", 733 exception_to_unicode(e)) 734 self._cleanup_proc(self.__cat_file_pipe) 735 self.__cat_file_pipe = None
736
737 - def verifyrev(self, rev):
738 """verify/lookup given revision object and return a sha id or None 739 if lookup failed 740 """ 741 rev = self._fs_from_unicode(rev) 742 743 _rev_cache = self.rev_cache 744 745 if GitCore.is_sha(rev): 746 # maybe it's a short or full rev 747 fullrev = self.fullrev(rev) 748 if fullrev: 749 return fullrev 750 751 refs = _rev_cache.refs_dict 752 if rev == 'HEAD': # resolve HEAD 753 refname = refs.get('HEAD') 754 if refname in refs: 755 return refs[refname] 756 resolved = refs.get('refs/heads/' + rev) # resolve branch 757 if resolved: 758 return resolved 759 resolved = refs.get('refs/tags/' + rev) # resolve tag 760 if resolved: 761 return resolved 762 763 # fall back to external git calls 764 rc = self.repo.rev_parse('--verify', rev).strip() 765 if not rc: 766 return None 767 if rc in _rev_cache.rev_dict: 768 return rc 769 770 return None
771
772 - def shortrev(self, rev, min_len=7):
773 """try to shorten sha id""" 774 #try to emulate the following: 775 #return self.repo.rev_parse("--short", str(rev)).strip() 776 rev = str(rev) 777 778 if min_len < self.__SREV_MIN: 779 min_len = self.__SREV_MIN 780 781 _rev_cache = self.rev_cache 782 783 if rev not in _rev_cache.rev_dict: 784 return None 785 786 srev = rev[:min_len] 787 srevs = set(_rev_cache.srev_dict[self.__rev_key(rev)]) 788 789 if len(srevs) == 1: 790 return srev # we already got a unique id 791 792 # find a shortened id for which rev doesn't conflict with 793 # the other ones from srevs 794 crevs = srevs - set([rev]) 795 796 for l in range(min_len+1, 40): 797 srev = rev[:l] 798 if srev not in [ r[:l] for r in crevs ]: 799 return srev 800 801 return rev # worst-case, all except the last character match
802
803 - def fullrev(self, srev):
804 """try to reverse shortrev()""" 805 srev = str(srev) 806 807 _rev_cache = self.rev_cache 808 809 # short-cut 810 if len(srev) == 40 and srev in _rev_cache.rev_dict: 811 return srev 812 813 if not GitCore.is_sha(srev): 814 return None 815 816 try: 817 srevs = _rev_cache.srev_dict[self.__rev_key(srev)] 818 except KeyError: 819 return None 820 821 srevs = filter(lambda s: s.startswith(srev), srevs) 822 if len(srevs) == 1: 823 return srevs[0] 824 825 return None
826
827 - def get_tags(self, rev=None):
828 return sorted(self._fs_to_unicode(name) 829 for name, rev_ in self.rev_cache.iter_tags() 830 if rev is None or rev == rev_)
831
832 - def ls_tree(self, rev, path='', recursive=False):
833 rev = rev and str(rev) or 'HEAD' # paranoia 834 path = self._fs_from_unicode(path).lstrip('/') or '.' 835 tree = self.repo.ls_tree('-zlr' if recursive else '-zl', 836 rev, '--', path).split('\0') 837 838 def split_ls_tree_line(l): 839 """split according to '<mode> <type> <sha> <size>\t<fname>'""" 840 841 meta, fname = l.split('\t', 1) 842 _mode, _type, _sha, _size = meta.split() 843 _mode = int(_mode, 8) 844 _size = None if _size == '-' else int(_size) 845 return _mode, _type, _sha, _size, self._fs_to_unicode(fname)
846 847 return [ split_ls_tree_line(e) for e in tree if e ] 848
849 - def read_commit(self, commit_id):
850 if not commit_id: 851 raise GitError("read_commit called with empty commit_id") 852 853 commit_id, commit_id_orig = self.fullrev(commit_id), commit_id 854 855 db = self.get_commits() 856 if commit_id not in db: 857 self.logger.info("read_commit failed for '%s' ('%s')", 858 commit_id, commit_id_orig) 859 raise GitErrorSha 860 861 with self.__commit_msg_lock: 862 if commit_id in self.__commit_msg_cache: 863 # cache hit 864 result = self.__commit_msg_cache[commit_id] 865 return result[0], dict(result[1]) 866 867 # cache miss 868 raw = self.cat_file('commit', commit_id) 869 raw = unicode(raw, self.get_commit_encoding(), 'replace') 870 result = parse_commit(raw) 871 872 self.__commit_msg_cache[commit_id] = result 873 874 return result[0], dict(result[1])
875
876 - def get_file(self, sha):
877 return self._cat_file_reader('blob', str(sha))
878
879 - def get_obj_size(self, sha):
880 sha = str(sha) 881 882 try: 883 obj_size = int(self.repo.cat_file('-s', sha).strip()) 884 except ValueError: 885 raise GitErrorSha("object '%s' not found" % sha) 886 887 return obj_size
888
889 - def children(self, sha):
890 db = self.get_commits() 891 892 try: 893 return sorted(db[sha][0]) 894 except KeyError: 895 return []
896
897 - def children_recursive(self, sha, rev_dict=None):
898 """Recursively traverse children in breadth-first order""" 899 900 if rev_dict is None: 901 rev_dict = self.get_commits() 902 903 work_list = deque() 904 seen = set() 905 906 _children = rev_dict[sha][0] 907 seen.update(_children) 908 work_list.extend(_children) 909 910 while work_list: 911 p = work_list.popleft() 912 yield p 913 914 _children = rev_dict[p][0] - seen 915 seen.update(_children) 916 work_list.extend(_children) 917 918 assert len(work_list) == 0
919
920 - def parents(self, sha):
921 db = self.get_commits() 922 923 try: 924 return list(db[sha][1]) 925 except KeyError: 926 return []
927
928 - def all_revs(self):
929 return self.get_commits().iterkeys()
930
931 - def sync(self):
932 with self.__rev_cache_lock: 933 return self._refresh_rev_cache(force=True)
934 935 @contextmanager
936 - def get_historian(self, sha, base_path):
937 p = [] 938 change = {} 939 next_path = [] 940 base_path = self._fs_from_unicode(base_path) or '.' 941 942 def name_status_gen(): 943 p[:] = [self.repo.log_pipe('--pretty=format:%n%H', '--no-renames', 944 '--name-status', sha, '--', base_path)] 945 f = p[0].stdout 946 for l in f: 947 if l == '\n': 948 continue 949 old_sha = l.rstrip('\n') 950 for l in f: 951 if l == '\n': 952 break 953 _, path = l.rstrip('\n').split('\t', 1) 954 # git-log without -z option quotes each pathname 955 path = _unquote(path) 956 while path not in change: 957 change[path] = old_sha 958 if next_path == [path]: 959 yield old_sha 960 try: 961 path, _ = path.rsplit('/', 1) 962 except ValueError: 963 break 964 if p: 965 self._cleanup_proc(p[0]) 966 p[:] = [] 967 while True: 968 yield None
969 gen = name_status_gen() 970 971 def historian(path): 972 path = self._fs_from_unicode(path) 973 try: 974 return change[path] 975 except KeyError: 976 next_path[:] = [path] 977 return gen.next() 978 979 try: 980 yield historian 981 finally: 982 if p: 983 self._cleanup_proc(p[0]) 984
985 - def last_change(self, sha, path, historian=None):
986 if historian is not None: 987 return historian(path) 988 tmp = self.history(sha, path, limit=1) 989 return tmp[0] if tmp else None
990
991 - def history(self, sha, path, limit=None):
992 if limit is None: 993 limit = -1 994 995 args = ['--max-count=%d' % limit, str(sha)] 996 if path: 997 args.extend(('--', self._fs_from_unicode(path))) 998 tmp = self.repo.rev_list(*args) 999 return [rev.strip() for rev in tmp.splitlines()]
1000
1001 - def history_timerange(self, start, stop):
1002 # retrieve start <= committer-time < stop, 1003 # see CachedRepository.get_changesets() 1004 return [ rev.strip() for rev in \ 1005 self.repo.rev_list('--date-order', 1006 '--max-age=%d' % start, 1007 '--min-age=%d' % (stop - 1), 1008 '--all').splitlines() ]
1009
1010 - def rev_is_anchestor_of(self, rev1, rev2):
1011 """return True if rev2 is successor of rev1""" 1012 1013 rev_dict = self.get_commits() 1014 return (rev2 in rev_dict and 1015 rev2 in self.children_recursive(rev1, rev_dict))
1016
1017 - def blame(self, commit_sha, path):
1018 in_metadata = False 1019 1020 path = self._fs_from_unicode(path) 1021 1022 for line in self.repo.blame('-p', '--', path, str(commit_sha)) \ 1023 .splitlines(): 1024 assert line 1025 if in_metadata: 1026 in_metadata = not line.startswith('\t') 1027 else: 1028 split_line = line.split() 1029 if len(split_line) == 4: 1030 (sha, orig_lineno, lineno, group_size) = split_line 1031 else: 1032 (sha, orig_lineno, lineno) = split_line 1033 1034 assert len(sha) == 40 1035 yield (sha, lineno) 1036 in_metadata = True 1037 1038 assert not in_metadata
1039
1040 - def diff_tree(self, tree1, tree2, path='', find_renames=False):
1041 """calls `git diff-tree` and returns tuples of the kind 1042 (mode1,mode2,obj1,obj2,action,path1,path2)""" 1043 1044 # diff-tree returns records with the following structure: 1045 # :<old-mode> <new-mode> <old-sha> <new-sha> <change> NUL <old-path> NUL [ <new-path> NUL ] 1046 1047 path = self._fs_from_unicode(path).strip('/') or '.' 1048 diff_tree_args = ['-z', '-r'] 1049 if find_renames: 1050 diff_tree_args.append('-M') 1051 diff_tree_args.extend([str(tree1) if tree1 else '--root', 1052 str(tree2), '--', path]) 1053 result = self.repo.diff_tree(*diff_tree_args) 1054 if not result: 1055 return 1056 1057 def iter_entry(result): 1058 start = 0 1059 while True: 1060 idx = result.find('\0', start) 1061 if idx == -1: 1062 return 1063 yield result[start:idx] 1064 start = idx + 1
1065 1066 iterate = iter_entry(result) 1067 1068 def next_entry(): 1069 return iterate.next() 1070 1071 if not tree1: 1072 # if only one tree-sha is given on commandline, 1073 # the first line is just the redundant tree-sha itself... 1074 entry = next_entry() 1075 assert not entry.startswith(':') 1076 1077 while True: 1078 try: 1079 entry = next_entry() 1080 except StopIteration: 1081 return 1082 assert entry.startswith(':') 1083 values = entry[1:].split(' ') 1084 assert len(values) == 5 1085 old_mode, new_mode, old_sha, new_sha, change = values 1086 old_mode = int(old_mode, 8) 1087 new_mode = int(new_mode, 8) 1088 change = change[:1] 1089 old_path = self._fs_to_unicode(next_entry()) 1090 new_path = None 1091 if change in ('R', 'C'): # renamed or copied 1092 new_path = self._fs_to_unicode(next_entry()) 1093 yield (old_mode, new_mode, old_sha, new_sha, change, old_path, 1094 new_path) 1095
1096 - def _raise_not_readable(self, git_dir, e):
1097 raise GitError("Make sure the Git repository '%s' is readable: %s" 1098 % (git_dir, to_unicode(e)))
1099
1100 - def _control_files_exist(self, git_dir):
1101 for name in ('HEAD', 'objects', 'refs'): 1102 if not os.path.exists(os.path.join(git_dir, name)): 1103 self.logger.debug("Missing Git control file '%s' in '%s'", 1104 name, git_dir) 1105 return False 1106 return True
1107