Package trac :: Package versioncontrol :: Module cache

Source Code for Module trac.versioncontrol.cache

  1  # -*- coding: utf-8 -*- 
  2  # 
  3  # Copyright (C) 2005-2023 Edgewall Software 
  4  # Copyright (C) 2005 Christopher Lenz <[email protected]> 
  5  # All rights reserved. 
  6  # 
  7  # This software is licensed as described in the file COPYING, which 
  8  # you should have received as part of this distribution. The terms 
  9  # are also available at https://trac.edgewall.org/wiki/TracLicense. 
 10  # 
 11  # This software consists of voluntary contributions made by many 
 12  # individuals. For the exact contribution history, see the revision 
 13  # history and logs, available at https://trac.edgewall.org/log/. 
 14  # 
 15  # Author: Christopher Lenz <[email protected]> 
 16   
 17  import os 
 18   
 19  from trac.cache import cached 
 20  from trac.core import TracError 
 21  from trac.util.datefmt import from_utimestamp, to_utimestamp 
 22  from trac.util.translation import _ 
 23  from trac.versioncontrol import Changeset, Node, Repository, NoSuchChangeset 
 24   
 25   
 26  _kindmap = {'D': Node.DIRECTORY, 'F': Node.FILE} 
 27  _actionmap = {'A': Changeset.ADD, 'C': Changeset.COPY, 
 28                'D': Changeset.DELETE, 'E': Changeset.EDIT, 
 29                'M': Changeset.MOVE} 
30 31 -def _invert_dict(d):
32 return dict(zip(d.values(), list(d)))
33 34 _inverted_kindmap = _invert_dict(_kindmap) 35 _inverted_actionmap = _invert_dict(_actionmap) 36 37 CACHE_REPOSITORY_DIR = 'repository_dir' 38 CACHE_YOUNGEST_REV = 'youngest_rev' 39 40 CACHE_METADATA_KEYS = (CACHE_REPOSITORY_DIR, CACHE_YOUNGEST_REV)
41 42 43 -def _norm_reponame(repos):
44 return repos.reponame or '(default)'
45
46 47 -class CachedRepository(Repository):
48 49 has_linear_changesets = False 50 51 scope = property(lambda self: self.repos.scope) 52
53 - def __init__(self, env, repos, log):
54 self.env = env 55 self.repos = repos 56 self._metadata_id = str(self.repos.id) 57 Repository.__init__(self, repos.name, repos.params, log)
58
59 - def close(self):
60 self.repos.close()
61
62 - def get_base(self):
63 return self.repos.get_base()
64
65 - def get_quickjump_entries(self, rev):
66 return self.repos.get_quickjump_entries(self.normalize_rev(rev))
67
68 - def get_path_url(self, path, rev):
69 return self.repos.get_path_url(path, rev)
70
71 - def get_changeset(self, rev):
72 return CachedChangeset(self, self.normalize_rev(rev), self.env)
73
74 - def get_changeset_uid(self, rev):
75 return self.repos.get_changeset_uid(rev)
76
77 - def get_changesets(self, start, stop):
78 for rev, in self.env.db_query(""" 79 SELECT rev FROM revision 80 WHERE repos=%s AND time >= %s AND time < %s 81 ORDER BY time DESC, rev DESC 82 """, (self.id, to_utimestamp(start), to_utimestamp(stop))): 83 try: 84 yield self.get_changeset(rev) 85 except NoSuchChangeset: 86 pass # skip changesets currently being resync'ed
87
88 - def sync_changeset(self, rev):
89 cset = self.repos.get_changeset(rev) 90 srev = self.db_rev(cset.rev) 91 old_cset = None 92 93 with self.env.db_transaction as db: 94 try: 95 old_cset = CachedChangeset(self, cset.rev, self.env) 96 except NoSuchChangeset: 97 old_cset = None 98 if old_cset: 99 db("""UPDATE revision SET time=%s, author=%s, message=%s 100 WHERE repos=%s AND rev=%s 101 """, (to_utimestamp(cset.date), cset.author, 102 cset.message, self.id, srev)) 103 else: 104 self.insert_changeset(cset.rev, cset) 105 return old_cset
106 107 @cached('_metadata_id')
108 - def metadata(self):
109 """Retrieve data for the cached `metadata` attribute.""" 110 return dict(self.env.db_query(""" 111 SELECT name, value FROM repository 112 WHERE id=%%s AND name IN (%s) 113 """ % ','.join(['%s'] * len(CACHE_METADATA_KEYS)), 114 (self.id,) + CACHE_METADATA_KEYS))
115
116 - def sync(self, feedback=None, clean=False):
117 if clean: 118 self.remove_cache() 119 120 metadata = self.metadata 121 self.save_metadata(metadata) 122 123 # -- retrieve the youngest revision in the repository and the youngest 124 # revision cached so far 125 self.repos.clear() 126 repos_youngest = self.repos.youngest_rev 127 youngest = metadata.get(CACHE_YOUNGEST_REV) 128 129 # -- verify and normalize youngest revision 130 if youngest: 131 youngest = self.repos.normalize_rev(youngest) 132 if not youngest: 133 self.log.debug("normalize_rev failed (youngest_rev=%r, " 134 "reponame=%s)", 135 self.youngest_rev, _norm_reponame(self)) 136 else: 137 self.log.debug("cache metadata undefined (youngest_rev=%r, " 138 "reponame=%s)", 139 self.youngest_rev, _norm_reponame(self)) 140 youngest = None 141 142 # -- compare them and try to resync if different 143 next_youngest = None 144 if youngest != repos_youngest: 145 self.log.info("repos rev [%s] != cached rev [%s] in '%s'", 146 repos_youngest, youngest, _norm_reponame(self)) 147 if youngest: 148 next_youngest = self.repos.next_rev(youngest) 149 else: 150 try: 151 next_youngest = self.repos.oldest_rev 152 # Ugly hack needed because doing that everytime in 153 # oldest_rev suffers from horrendeous performance (#5213) 154 if self.repos.scope != '/' and not \ 155 self.repos.has_node('/', next_youngest): 156 next_youngest = self.repos.next_rev(next_youngest, 157 find_initial_rev=True) 158 next_youngest = self.repos.normalize_rev(next_youngest) 159 except TracError: 160 # can't normalize oldest_rev: repository was empty 161 return 162 163 if next_youngest is None: # nothing to cache yet 164 return 165 srev = self.db_rev(next_youngest) 166 167 # 0. first check if there's no (obvious) resync in progress 168 with self.env.db_query as db: 169 for rev, in db( 170 "SELECT rev FROM revision WHERE repos=%s AND rev=%s", 171 (self.id, srev)): 172 # already there, but in progress, so keep ''previous'' 173 # notion of 'youngest' 174 self.repos.clear(youngest_rev=youngest) 175 return 176 177 # prepare for resyncing (there might still be a race 178 # condition at this point) 179 while next_youngest is not None: 180 srev = self.db_rev(next_youngest) 181 182 with self.env.db_transaction as db: 183 self.log.info("Trying to sync revision [%s] in '%s'", 184 next_youngest, _norm_reponame(self)) 185 cset = self.repos.get_changeset(next_youngest) 186 try: 187 # steps 1. and 2. 188 self.insert_changeset(next_youngest, cset) 189 except Exception as e: # *another* 1.1. resync attempt won 190 if isinstance(e, self.env.db_exc.IntegrityError): 191 self.log.warning("Revision %s in '%s' already " 192 "cached: %r", next_youngest, 193 _norm_reponame(self), e) 194 else: 195 self.log.error("Unable to create cache records " 196 "for revision %s in '%s': %r", 197 next_youngest, _norm_reponame(self), 198 e) 199 # the other resync attempts is also 200 # potentially still in progress, so for our 201 # process/thread, keep ''previous'' notion of 202 # 'youngest' 203 self.repos.clear(youngest_rev=youngest) 204 # FIXME: This aborts a containing transaction 205 db.rollback() 206 return 207 208 # 3. update 'youngest_rev' metadata (minimize 209 # possibility of failures at point 0.) 210 db(""" 211 UPDATE repository SET value=%s WHERE id=%s AND name=%s 212 """, (str(next_youngest), self.id, CACHE_YOUNGEST_REV)) 213 del self.metadata 214 215 # 4. iterate (1. should always succeed now) 216 youngest = next_youngest 217 next_youngest = self.repos.next_rev(next_youngest) 218 219 # 5. provide some feedback 220 if feedback: 221 feedback(youngest)
222
223 - def remove_cache(self):
224 """Remove the repository cache.""" 225 self.log.info("Cleaning cache in '%s'", _norm_reponame(self)) 226 with self.env.db_transaction as db: 227 db("DELETE FROM revision WHERE repos=%s", 228 (self.id,)) 229 db("DELETE FROM node_change WHERE repos=%s", 230 (self.id,)) 231 db.executemany("DELETE FROM repository WHERE id=%s AND name=%s", 232 [(self.id, k) for k in CACHE_METADATA_KEYS]) 233 db.executemany(""" 234 INSERT INTO repository (id, name, value) 235 VALUES (%s, %s, %s) 236 """, [(self.id, k, '') for k in CACHE_METADATA_KEYS]) 237 del self.metadata
238
239 - def save_metadata(self, metadata):
240 """Save the repository metadata.""" 241 with self.env.db_transaction as db: 242 invalidate = False 243 244 # -- check that we're populating the cache for the correct 245 # repository 246 repository_dir = metadata.get(CACHE_REPOSITORY_DIR) 247 if repository_dir: 248 # directory part of the repo name can vary on case insensitive 249 # fs 250 if os.path.normcase(repository_dir) \ 251 != os.path.normcase(self.name): 252 self.log.info("'repository_dir' has changed from %r to %r", 253 repository_dir, self.name) 254 raise TracError(_("The repository directory has changed, " 255 "you should resynchronize the " 256 "repository with: trac-admin $ENV " 257 "repository resync '%(reponame)s'", 258 reponame=_norm_reponame(self))) 259 elif repository_dir is None: # 260 self.log.info('Storing initial "repository_dir": %s', 261 self.name) 262 db("""INSERT INTO repository (id, name, value) 263 VALUES (%s, %s, %s) 264 """, (self.id, CACHE_REPOSITORY_DIR, self.name)) 265 invalidate = True 266 else: # 'repository_dir' cleared by a resync 267 self.log.info('Resetting "repository_dir": %s', self.name) 268 db("UPDATE repository SET value=%s WHERE id=%s AND name=%s", 269 (self.name, self.id, CACHE_REPOSITORY_DIR)) 270 invalidate = True 271 272 # -- insert a 'youngeset_rev' for the repository if necessary 273 if CACHE_YOUNGEST_REV not in metadata: 274 db("""INSERT INTO repository (id, name, value) 275 VALUES (%s, %s, %s) 276 """, (self.id, CACHE_YOUNGEST_REV, '')) 277 invalidate = True 278 279 if invalidate: 280 del self.metadata
281
282 - def insert_changeset(self, rev, cset):
283 """Create revision and node_change records for the given changeset 284 instance.""" 285 srev = self.db_rev(rev) 286 with self.env.db_transaction as db: 287 # 1. Attempt to resync the 'revision' table. In case of 288 # concurrent syncs, only such insert into the `revision` table 289 # will succeed, the others will fail and raise an exception. 290 db(""" 291 INSERT INTO revision (repos,rev,time,author,message) 292 VALUES (%s,%s,%s,%s,%s) 293 """, (self.id, srev, to_utimestamp(cset.date), 294 cset.author, cset.message)) 295 # 2. now *only* one process was able to get there (i.e. there 296 # *shouldn't* be any race condition here) 297 for path, kind, action, bpath, brev in cset.get_changes(): 298 self.log.debug("Caching node change in [%s] in '%s': %r", 299 rev, _norm_reponame(self.repos), 300 (path, kind, action, bpath, brev)) 301 kind = _inverted_kindmap[kind] 302 action = _inverted_actionmap[action] 303 db(""" 304 INSERT INTO node_change 305 (repos,rev,path,node_type,change_type,base_path, 306 base_rev) 307 VALUES (%s,%s,%s,%s,%s,%s,%s) 308 """, (self.id, srev, path, kind, action, bpath, brev))
309
310 - def get_node(self, path, rev=None):
311 return self.repos.get_node(path, self.normalize_rev(rev))
312
313 - def _get_node_revs(self, path, last=None, first=None):
314 """Return the revisions affecting `path` between `first` and `last` 315 revisions. 316 """ 317 last = self.normalize_rev(last) 318 slast = self.db_rev(last) 319 node = self.get_node(path, last) # Check node existence 320 with self.env.db_query as db: 321 if first is None: 322 first = db(""" 323 SELECT MAX(rev) FROM node_change 324 WHERE repos=%s AND rev<=%s AND path=%s 325 AND change_type IN ('A', 'C', 'M') 326 """, (self.id, slast, path)) 327 first = int(first[0][0]) if first[0][0] is not None else 0 328 sfirst = self.db_rev(first) 329 return [int(rev) for rev, in db(""" 330 SELECT DISTINCT rev FROM node_change 331 WHERE repos=%%s AND rev>=%%s AND rev<=%%s 332 AND (path=%%s OR path %s)""" % db.prefix_match(), 333 (self.id, sfirst, slast, path, 334 db.prefix_match_value(path + '/')))]
335
336 - def _get_changed_revs(self, node_infos):
337 if not node_infos: 338 return {} 339 340 node_infos = [(node, self.normalize_rev(first)) for node, first 341 in node_infos] 342 sfirst = self.db_rev(min(first for node, first in node_infos)) 343 slast = self.db_rev(max(node.rev for node, first in node_infos)) 344 path_infos = {node.path: (node, first) for node, first in node_infos} 345 path_revs = {node.path: [] for node, first in node_infos} 346 347 # Prevent "too many SQL variables" since max number of parameters is 348 # 999 on SQLite. No limitation on PostgreSQL and MySQL. 349 idx = 0 350 delta = (999 - 3) // 5 351 with self.env.db_query as db: 352 prefix_match = db.prefix_match() 353 while idx < len(node_infos): 354 subset = node_infos[idx:idx + delta] 355 idx += delta 356 count = len(subset) 357 358 holders = ','.join(('%s',) * count) 359 query = """\ 360 SELECT DISTINCT 361 rev, (CASE WHEN path IN (%s) THEN path %s END) AS path 362 FROM node_change 363 WHERE repos=%%s AND rev>=%%s AND rev<=%%s 364 AND (path IN (%s) %s) 365 """ % \ 366 (holders, 367 ' '.join(('WHEN path ' + prefix_match + ' THEN %s',) 368 * count), 369 holders, 370 ' '.join(('OR path ' + prefix_match,) 371 * count)) 372 args = [] 373 args.extend(node.path for node, first in subset) 374 for node, first in subset: 375 args.append(db.prefix_match_value(node.path + '/')) 376 args.append(node.path) 377 args.extend((self.id, sfirst, slast)) 378 args.extend(node.path for node, first in subset) 379 args.extend(db.prefix_match_value(node.path + '/') 380 for node, first in subset) 381 382 for srev, path in db(query, args): 383 rev = self.rev_db(srev) 384 node, first = path_infos[path] 385 if first <= rev <= node.rev: 386 path_revs[path].append(rev) 387 388 return path_revs
389
390 - def has_node(self, path, rev=None):
391 return self.repos.has_node(path, self.normalize_rev(rev))
392
393 - def get_oldest_rev(self):
394 return self.repos.oldest_rev
395
396 - def get_youngest_rev(self):
397 return self.rev_db(self.metadata.get(CACHE_YOUNGEST_REV))
398
399 - def previous_rev(self, rev, path=''):
400 # Hitting the repository directly is faster than searching the 401 # database. When there is a long stretch of inactivity on a file (in 402 # particular, when a file is added late in the history) the database 403 # query can take a very long time to determine that there is no 404 # previous revision in the node_changes table. However, the repository 405 # will have a datastructure that will allow it to find the previous 406 # version of a node fairly directly. 407 #if self.has_linear_changesets: 408 # return self._next_prev_rev('<', rev, path) 409 return self.repos.previous_rev(self.normalize_rev(rev), path)
410
411 - def next_rev(self, rev, path=''):
412 if self.has_linear_changesets: 413 return self._next_prev_rev('>', rev, path) 414 else: 415 return self.repos.next_rev(self.normalize_rev(rev), path)
416
417 - def _next_prev_rev(self, direction, rev, path=''):
418 srev = self.db_rev(rev) 419 with self.env.db_query as db: 420 # the changeset revs are sequence of ints: 421 sql = "SELECT %(aggr)s(rev) FROM %(tab)s " \ 422 "WHERE repos=%%s AND rev%(dir)s%%s" 423 aggr = 'MAX' if direction == '<' else 'MIN' 424 args = [self.id, srev] 425 426 if path: 427 path = path.lstrip('/') 428 sql %= {'aggr': aggr, 'dir': direction, 'tab': 'node_change'} 429 # changes on path itself or its children 430 sql += " AND (path=%s OR path " + db.prefix_match() 431 args.extend((path, db.prefix_match_value(path + '/'))) 432 # deletion of path ancestors 433 components = path.lstrip('/').split('/') 434 parents = ','.join(('%s',) * len(components)) 435 sql += " OR (path IN (" + parents + ") AND change_type='D'))" 436 for i in xrange(1, len(components) + 1): 437 args.append('/'.join(components[:i])) 438 else: 439 sql %= {'aggr': aggr, 'dir': direction, 'tab': 'revision'} 440 441 for rev, in db(sql, args): 442 if rev is not None: 443 return int(rev)
444
445 - def parent_revs(self, rev):
446 if self.has_linear_changesets: 447 return Repository.parent_revs(self, rev) 448 else: 449 return self.repos.parent_revs(rev)
450
451 - def rev_older_than(self, rev1, rev2):
452 return self.repos.rev_older_than(self.normalize_rev(rev1), 453 self.normalize_rev(rev2))
454
455 - def get_path_history(self, path, rev=None, limit=None):
456 return self.repos.get_path_history(path, self.normalize_rev(rev), 457 limit)
458
459 - def normalize_path(self, path):
460 return self.repos.normalize_path(path)
461
462 - def normalize_rev(self, rev):
463 if rev is None or isinstance(rev, basestring) and \ 464 rev.lower() in ('', 'head', 'latest', 'youngest'): 465 return self.rev_db(self.youngest_rev or 0) 466 else: 467 try: 468 rev = int(rev) 469 if rev <= self.youngest_rev: 470 return rev 471 except (ValueError, TypeError): 472 pass 473 raise NoSuchChangeset(rev)
474
475 - def db_rev(self, rev):
476 """Convert a revision to its representation in the database.""" 477 return str(rev)
478
479 - def rev_db(self, rev):
480 """Convert a revision from its representation in the database.""" 481 return rev
482
483 - def get_changes(self, old_path, old_rev, new_path, new_rev, 484 ignore_ancestry=1):
485 return self.repos.get_changes(old_path, self.normalize_rev(old_rev), 486 new_path, self.normalize_rev(new_rev), 487 ignore_ancestry)
488
489 490 -class CachedChangeset(Changeset):
491
492 - def __init__(self, repos, rev, env):
493 self.env = env 494 drev = repos.db_rev(rev) 495 for _date, author, message in self.env.db_query(""" 496 SELECT time, author, message FROM revision 497 WHERE repos=%s AND rev=%s 498 """, (repos.id, drev)): 499 date = from_utimestamp(_date) 500 Changeset.__init__(self, repos, repos.rev_db(rev), message, author, 501 date) 502 break 503 else: 504 repos.log.debug("Missing revision record (%r, %r) in '%s'", 505 repos.id, drev, _norm_reponame(repos)) 506 raise NoSuchChangeset(rev)
507
508 - def get_changes(self):
509 for path, kind, change, base_path, base_rev in sorted( 510 self.env.db_query(""" 511 SELECT path, node_type, change_type, base_path, base_rev 512 FROM node_change WHERE repos=%s AND rev=%s 513 ORDER BY path 514 """, (self.repos.id, self.repos.db_rev(self.rev)))): 515 kind = _kindmap[kind] 516 change = _actionmap[change] 517 yield path, kind, change, base_path, self.repos.rev_db(base_rev)
518
519 - def get_properties(self):
520 return self.repos.repos.get_changeset(self.rev).get_properties()
521