Package trac :: Package versioncontrol :: Module cache

Source Code for Module trac.versioncontrol.cache

  1  # -*- coding: utf-8 -*- 
  2  # 
  3  # Copyright (C) 2005-2020 Edgewall Software 
  4  # Copyright (C) 2005 Christopher Lenz <[email protected]> 
  5  # All rights reserved. 
  6  # 
  7  # This software is licensed as described in the file COPYING, which 
  8  # you should have received as part of this distribution. The terms 
  9  # are also available at https://trac.edgewall.org/wiki/TracLicense. 
 10  # 
 11  # This software consists of voluntary contributions made by many 
 12  # individuals. For the exact contribution history, see the revision 
 13  # history and logs, available at https://trac.edgewall.org/log/. 
 14  # 
 15  # Author: Christopher Lenz <[email protected]> 
 16   
 17  from __future__ import with_statement 
 18   
 19  import os 
 20   
 21  from trac.cache import cached 
 22  from trac.core import TracError 
 23  from trac.util.datefmt import from_utimestamp, to_utimestamp 
 24  from trac.util.translation import _ 
 25  from trac.versioncontrol import Changeset, Node, Repository, NoSuchChangeset 
 26   
 27   
 28  _kindmap = {'D': Node.DIRECTORY, 'F': Node.FILE} 
 29  _actionmap = {'A': Changeset.ADD, 'C': Changeset.COPY, 
 30                'D': Changeset.DELETE, 'E': Changeset.EDIT, 
 31                'M': Changeset.MOVE} 
32 33 -def _invert_dict(d):
34 return dict(zip(d.values(), d.keys()))
35 36 _inverted_kindmap = _invert_dict(_kindmap) 37 _inverted_actionmap = _invert_dict(_actionmap) 38 39 CACHE_REPOSITORY_DIR = 'repository_dir' 40 CACHE_YOUNGEST_REV = 'youngest_rev' 41 42 CACHE_METADATA_KEYS = (CACHE_REPOSITORY_DIR, CACHE_YOUNGEST_REV)
43 44 45 -def _norm_reponame(repos):
46 return repos.reponame or '(default)'
47
48 49 -class CachedRepository(Repository):
50 51 has_linear_changesets = False 52 53 scope = property(lambda self: self.repos.scope) 54
55 - def __init__(self, env, repos, log):
56 self.env = env 57 self.repos = repos 58 self._metadata_id = str(self.repos.id) 59 Repository.__init__(self, repos.name, repos.params, log)
60
61 - def close(self):
62 self.repos.close()
63
64 - def get_base(self):
65 return self.repos.get_base()
66
67 - def get_quickjump_entries(self, rev):
68 return self.repos.get_quickjump_entries(self.normalize_rev(rev))
69
70 - def get_path_url(self, path, rev):
71 return self.repos.get_path_url(path, rev)
72
73 - def get_changeset(self, rev):
74 return CachedChangeset(self, self.normalize_rev(rev), self.env)
75
76 - def get_changeset_uid(self, rev):
77 return self.repos.get_changeset_uid(rev)
78
79 - def get_changesets(self, start, stop):
80 for rev, in self.env.db_query(""" 81 SELECT rev FROM revision 82 WHERE repos=%s AND time >= %s AND time < %s 83 ORDER BY time DESC, rev DESC 84 """, (self.id, to_utimestamp(start), to_utimestamp(stop))): 85 try: 86 yield self.get_changeset(rev) 87 except NoSuchChangeset: 88 pass # skip changesets currently being resync'ed
89
90 - def sync_changeset(self, rev):
91 cset = self.repos.get_changeset(rev) 92 srev = self.db_rev(cset.rev) 93 old_cset = None 94 95 with self.env.db_transaction as db: 96 for time, author, message in db(""" 97 SELECT time, author, message FROM revision 98 WHERE repos=%s AND rev=%s 99 """, (self.id, srev)): 100 old_cset = Changeset(self.repos, cset.rev, message, author, 101 from_utimestamp(time)) 102 if old_cset: 103 db("""UPDATE revision SET time=%s, author=%s, message=%s 104 WHERE repos=%s AND rev=%s 105 """, (to_utimestamp(cset.date), cset.author, 106 cset.message, self.id, srev)) 107 else: 108 self._insert_changeset(db, cset.rev, cset) 109 return old_cset
110 111 @cached('_metadata_id')
112 - def metadata(self):
113 """Retrieve data for the cached `metadata` attribute.""" 114 return dict(self.env.db_query(""" 115 SELECT name, value FROM repository 116 WHERE id=%%s AND name IN (%s) 117 """ % ','.join(['%s'] * len(CACHE_METADATA_KEYS)), 118 (self.id,) + CACHE_METADATA_KEYS))
119
120 - def sync(self, feedback=None, clean=False):
121 if clean: 122 self.remove_cache() 123 124 metadata = self.metadata 125 self.save_metadata(metadata) 126 127 # -- retrieve the youngest revision in the repository and the youngest 128 # revision cached so far 129 self.repos.clear() 130 repos_youngest = self.repos.youngest_rev 131 youngest = metadata.get(CACHE_YOUNGEST_REV) 132 133 # -- verify and normalize youngest revision 134 if youngest: 135 youngest = self.repos.normalize_rev(youngest) 136 if not youngest: 137 self.log.debug("normalize_rev failed (youngest_rev=%r, " 138 "reponame=%s)", 139 self.youngest_rev, _norm_reponame(self)) 140 else: 141 self.log.debug("cache metadata undefined (youngest_rev=%r, " 142 "reponame=%s)", 143 self.youngest_rev, _norm_reponame(self)) 144 youngest = None 145 146 # -- compare them and try to resync if different 147 next_youngest = None 148 if youngest != repos_youngest: 149 self.log.info("repos rev [%s] != cached rev [%s] in '%s'", 150 repos_youngest, youngest, _norm_reponame(self)) 151 if youngest: 152 next_youngest = self.repos.next_rev(youngest) 153 else: 154 try: 155 next_youngest = self.repos.oldest_rev 156 # Ugly hack needed because doing that everytime in 157 # oldest_rev suffers from horrendeous performance (#5213) 158 if self.repos.scope != '/' and not \ 159 self.repos.has_node('/', next_youngest): 160 next_youngest = self.repos.next_rev(next_youngest, 161 find_initial_rev=True) 162 next_youngest = self.repos.normalize_rev(next_youngest) 163 except TracError: 164 # can't normalize oldest_rev: repository was empty 165 return 166 167 if next_youngest is None: # nothing to cache yet 168 return 169 srev = self.db_rev(next_youngest) 170 171 # 0. first check if there's no (obvious) resync in progress 172 with self.env.db_query as db: 173 for rev, in db( 174 "SELECT rev FROM revision WHERE repos=%s AND rev=%s", 175 (self.id, srev)): 176 # already there, but in progress, so keep ''previous'' 177 # notion of 'youngest' 178 self.repos.clear(youngest_rev=youngest) 179 return 180 181 # prepare for resyncing (there might still be a race 182 # condition at this point) 183 while next_youngest is not None: 184 srev = self.db_rev(next_youngest) 185 186 with self.env.db_transaction as db: 187 self.log.info("Trying to sync revision [%s] in '%s'", 188 next_youngest, _norm_reponame(self)) 189 cset = self.repos.get_changeset(next_youngest) 190 try: 191 # steps 1. and 2. 192 self._insert_changeset(db, next_youngest, cset) 193 except Exception, e: # *another* 1.1. resync attempt won 194 if isinstance(e, self.env.db_exc.IntegrityError): 195 self.log.warning("Revision %s in '%s' already " 196 "cached: %r", next_youngest, 197 _norm_reponame(self), e) 198 else: 199 self.log.error("Unable to create cache records " 200 "for revision %s in '%s': %r", 201 next_youngest, _norm_reponame(self), 202 e) 203 # the other resync attempts is also 204 # potentially still in progress, so for our 205 # process/thread, keep ''previous'' notion of 206 # 'youngest' 207 self.repos.clear(youngest_rev=youngest) 208 # FIXME: This aborts a containing transaction 209 db.rollback() 210 return 211 212 # 3. update 'youngest_rev' metadata (minimize 213 # possibility of failures at point 0.) 214 db(""" 215 UPDATE repository SET value=%s WHERE id=%s AND name=%s 216 """, (str(next_youngest), self.id, CACHE_YOUNGEST_REV)) 217 del self.metadata 218 219 # 4. iterate (1. should always succeed now) 220 youngest = next_youngest 221 next_youngest = self.repos.next_rev(next_youngest) 222 223 # 5. provide some feedback 224 if feedback: 225 feedback(youngest)
226
227 - def remove_cache(self):
228 """Remove the repository cache.""" 229 self.log.info("Cleaning cache in '%s'", _norm_reponame(self)) 230 with self.env.db_transaction as db: 231 db("DELETE FROM revision WHERE repos=%s", 232 (self.id,)) 233 db("DELETE FROM node_change WHERE repos=%s", 234 (self.id,)) 235 db.executemany("DELETE FROM repository WHERE id=%s AND name=%s", 236 [(self.id, k) for k in CACHE_METADATA_KEYS]) 237 db.executemany(""" 238 INSERT INTO repository (id, name, value) 239 VALUES (%s, %s, %s) 240 """, [(self.id, k, '') for k in CACHE_METADATA_KEYS]) 241 del self.metadata
242
243 - def save_metadata(self, metadata):
244 """Save the repository metadata.""" 245 with self.env.db_transaction as db: 246 invalidate = False 247 248 # -- check that we're populating the cache for the correct 249 # repository 250 repository_dir = metadata.get(CACHE_REPOSITORY_DIR) 251 if repository_dir: 252 # directory part of the repo name can vary on case insensitive 253 # fs 254 if os.path.normcase(repository_dir) \ 255 != os.path.normcase(self.name): 256 self.log.info("'repository_dir' has changed from %r to %r", 257 repository_dir, self.name) 258 raise TracError(_("The repository directory has changed, " 259 "you should resynchronize the " 260 "repository with: trac-admin $ENV " 261 "repository resync '%(reponame)s'", 262 reponame=_norm_reponame(self))) 263 elif repository_dir is None: # 264 self.log.info('Storing initial "repository_dir": %s', 265 self.name) 266 db("""INSERT INTO repository (id, name, value) 267 VALUES (%s, %s, %s) 268 """, (self.id, CACHE_REPOSITORY_DIR, self.name)) 269 invalidate = True 270 else: # 'repository_dir' cleared by a resync 271 self.log.info('Resetting "repository_dir": %s', self.name) 272 db("UPDATE repository SET value=%s WHERE id=%s AND name=%s", 273 (self.name, self.id, CACHE_REPOSITORY_DIR)) 274 invalidate = True 275 276 # -- insert a 'youngeset_rev' for the repository if necessary 277 if CACHE_YOUNGEST_REV not in metadata: 278 db("""INSERT INTO repository (id, name, value) 279 VALUES (%s, %s, %s) 280 """, (self.id, CACHE_YOUNGEST_REV, '')) 281 invalidate = True 282 283 if invalidate: 284 del self.metadata
285
286 - def _insert_changeset(self, db, rev, cset):
287 srev = self.db_rev(rev) 288 # 1. Attempt to resync the 'revision' table. In case of 289 # concurrent syncs, only such insert into the `revision` table 290 # will succeed, the others will fail and raise an exception. 291 db(""" 292 INSERT INTO revision (repos,rev,time,author,message) 293 VALUES (%s,%s,%s,%s,%s) 294 """, (self.id, srev, to_utimestamp(cset.date), 295 cset.author, cset.message)) 296 # 2. now *only* one process was able to get there (i.e. there 297 # *shouldn't* be any race condition here) 298 for path, kind, action, bpath, brev in cset.get_changes(): 299 self.log.debug("Caching node change in [%s] in '%s': %r", 300 rev, _norm_reponame(self.repos), 301 (path, kind, action, bpath, brev)) 302 kind = _inverted_kindmap[kind] 303 action = _inverted_actionmap[action] 304 db(""" 305 INSERT INTO node_change 306 (repos,rev,path,node_type,change_type,base_path, 307 base_rev) 308 VALUES (%s,%s,%s,%s,%s,%s,%s) 309 """, (self.id, srev, path, kind, action, bpath, brev))
310
311 - def get_node(self, path, rev=None):
312 return self.repos.get_node(path, self.normalize_rev(rev))
313
314 - def _get_node_revs(self, path, last=None, first=None):
315 """Return the revisions affecting `path` between `first` and `last` 316 revisions. 317 """ 318 last = self.normalize_rev(last) 319 slast = self.db_rev(last) 320 node = self.get_node(path, last) # Check node existence 321 with self.env.db_query as db: 322 if first is None: 323 first = db(""" 324 SELECT MAX(rev) FROM node_change 325 WHERE repos=%s AND rev<=%s AND path=%s 326 AND change_type IN ('A', 'C', 'M') 327 """, (self.id, slast, path)) 328 first = int(first[0][0]) if first[0][0] is not None else 0 329 sfirst = self.db_rev(first) 330 return [int(rev) for rev, in db(""" 331 SELECT DISTINCT rev FROM node_change 332 WHERE repos=%%s AND rev>=%%s AND rev<=%%s 333 AND (path=%%s OR path %s)""" % db.prefix_match(), 334 (self.id, sfirst, slast, path, 335 db.prefix_match_value(path + '/')))]
336
337 - def _get_changed_revs(self, node_infos):
338 if not node_infos: 339 return {} 340 341 node_infos = [(node, self.normalize_rev(first)) for node, first 342 in node_infos] 343 sfirst = self.db_rev(min(first for node, first in node_infos)) 344 slast = self.db_rev(max(node.rev for node, first in node_infos)) 345 path_infos = dict((node.path, (node, first)) for node, first 346 in node_infos) 347 path_revs = dict((node.path, []) for node, first in node_infos) 348 349 db = self.env.get_read_db() 350 cursor = db.cursor() 351 prefix_match = db.prefix_match() 352 353 # Prevent "too many SQL variables" since max number of parameters is 354 # 999 on SQLite. No limitation on PostgreSQL and MySQL. 355 idx = 0 356 delta = (999 - 3) // 5 357 while idx < len(node_infos): 358 subset = node_infos[idx:idx + delta] 359 idx += delta 360 count = len(subset) 361 362 holders = ','.join(('%s',) * count) 363 query = """\ 364 SELECT DISTINCT 365 rev, (CASE WHEN path IN (%s) THEN path %s END) AS path 366 FROM node_change 367 WHERE repos=%%s AND rev>=%%s AND rev<=%%s AND (path IN (%s) %s) 368 """ % \ 369 (holders, 370 ' '.join(('WHEN path ' + prefix_match + ' THEN %s',) * count), 371 holders, 372 ' '.join(('OR path ' + prefix_match,) * count)) 373 args = [] 374 args.extend(node.path for node, first in subset) 375 for node, first in subset: 376 args.append(db.prefix_match_value(node.path + '/')) 377 args.append(node.path) 378 args.extend((self.id, sfirst, slast)) 379 args.extend(node.path for node, first in subset) 380 args.extend(db.prefix_match_value(node.path + '/') 381 for node, first in subset) 382 cursor.execute(query, args) 383 384 for srev, path in cursor: 385 rev = self.rev_db(srev) 386 node, first = path_infos[path] 387 if first <= rev <= node.rev: 388 path_revs[path].append(rev) 389 390 return path_revs
391
392 - def has_node(self, path, rev=None):
393 return self.repos.has_node(path, self.normalize_rev(rev))
394
395 - def get_oldest_rev(self):
396 return self.repos.oldest_rev
397
398 - def get_youngest_rev(self):
399 return self.rev_db(self.metadata.get(CACHE_YOUNGEST_REV))
400
401 - def previous_rev(self, rev, path=''):
402 # Hitting the repository directly is faster than searching the 403 # database. When there is a long stretch of inactivity on a file (in 404 # particular, when a file is added late in the history) the database 405 # query can take a very long time to determine that there is no 406 # previous revision in the node_changes table. However, the repository 407 # will have a datastructure that will allow it to find the previous 408 # version of a node fairly directly. 409 #if self.has_linear_changesets: 410 # return self._next_prev_rev('<', rev, path) 411 return self.repos.previous_rev(self.normalize_rev(rev), path)
412
413 - def next_rev(self, rev, path=''):
414 if self.has_linear_changesets: 415 return self._next_prev_rev('>', rev, path) 416 else: 417 return self.repos.next_rev(self.normalize_rev(rev), path)
418
419 - def _next_prev_rev(self, direction, rev, path=''):
420 srev = self.db_rev(rev) 421 with self.env.db_query as db: 422 # the changeset revs are sequence of ints: 423 sql = "SELECT %(aggr)s(rev) FROM %(tab)s " \ 424 "WHERE repos=%%s AND rev%(dir)s%%s" 425 aggr = 'MAX' if direction == '<' else 'MIN' 426 args = [self.id, srev] 427 428 if path: 429 path = path.lstrip('/') 430 sql %= {'aggr': aggr, 'dir': direction, 'tab': 'node_change'} 431 # changes on path itself or its children 432 sql += " AND (path=%s OR path " + db.prefix_match() 433 args.extend((path, db.prefix_match_value(path + '/'))) 434 # deletion of path ancestors 435 components = path.lstrip('/').split('/') 436 parents = ','.join(('%s',) * len(components)) 437 sql += " OR (path IN (" + parents + ") AND change_type='D'))" 438 for i in range(1, len(components) + 1): 439 args.append('/'.join(components[:i])) 440 else: 441 sql %= {'aggr': aggr, 'dir': direction, 'tab': 'revision'} 442 443 for rev, in db(sql, args): 444 if rev is not None: 445 return int(rev)
446
447 - def parent_revs(self, rev):
448 if self.has_linear_changesets: 449 return Repository.parent_revs(self, rev) 450 else: 451 return self.repos.parent_revs(rev)
452
453 - def rev_older_than(self, rev1, rev2):
454 return self.repos.rev_older_than(self.normalize_rev(rev1), 455 self.normalize_rev(rev2))
456
457 - def get_path_history(self, path, rev=None, limit=None):
458 return self.repos.get_path_history(path, self.normalize_rev(rev), 459 limit)
460
461 - def normalize_path(self, path):
462 return self.repos.normalize_path(path)
463
464 - def normalize_rev(self, rev):
465 if rev is None or isinstance(rev, basestring) and \ 466 rev.lower() in ('', 'head', 'latest', 'youngest'): 467 return self.rev_db(self.youngest_rev or 0) 468 else: 469 try: 470 rev = int(rev) 471 if rev <= self.youngest_rev: 472 return rev 473 except (ValueError, TypeError): 474 pass 475 raise NoSuchChangeset(rev)
476
477 - def db_rev(self, rev):
478 """Convert a revision to its representation in the database.""" 479 return str(rev)
480
481 - def rev_db(self, rev):
482 """Convert a revision from its representation in the database.""" 483 return rev
484
485 - def get_changes(self, old_path, old_rev, new_path, new_rev, 486 ignore_ancestry=1):
487 return self.repos.get_changes(old_path, self.normalize_rev(old_rev), 488 new_path, self.normalize_rev(new_rev), 489 ignore_ancestry)
490
491 492 -class CachedChangeset(Changeset):
493
494 - def __init__(self, repos, rev, env):
495 self.env = env 496 drev = repos.db_rev(rev) 497 for _date, author, message in self.env.db_query(""" 498 SELECT time, author, message FROM revision 499 WHERE repos=%s AND rev=%s 500 """, (repos.id, drev)): 501 date = from_utimestamp(_date) 502 Changeset.__init__(self, repos, repos.rev_db(rev), message, author, 503 date) 504 break 505 else: 506 repos.log.debug("Missing revision record (%r, %r) in '%s'", 507 repos.id, drev, _norm_reponame(repos)) 508 raise NoSuchChangeset(rev)
509
510 - def get_changes(self):
511 for path, kind, change, base_path, base_rev in sorted( 512 self.env.db_query(""" 513 SELECT path, node_type, change_type, base_path, base_rev 514 FROM node_change WHERE repos=%s AND rev=%s 515 ORDER BY path 516 """, (self.repos.id, self.repos.db_rev(self.rev)))): 517 kind = _kindmap[kind] 518 change = _actionmap[change] 519 yield path, kind, change, base_path, self.repos.rev_db(base_rev)
520
521 - def get_properties(self):
522 return self.repos.repos.get_changeset(self.rev).get_properties()
523