| Trees | Indices | Help |
|
|---|
|
|
1 # -*- coding: utf-8 -*-
2 #
3 # Copyright (C) 2005-2020 Edgewall Software
4 # Copyright (C) 2005 Christopher Lenz <[email protected]>
5 # All rights reserved.
6 #
7 # This software is licensed as described in the file COPYING, which
8 # you should have received as part of this distribution. The terms
9 # are also available at https://trac.edgewall.org/wiki/TracLicense.
10 #
11 # This software consists of voluntary contributions made by many
12 # individuals. For the exact contribution history, see the revision
13 # history and logs, available at https://trac.edgewall.org/log/.
14 #
15 # Author: Christopher Lenz <[email protected]>
16
17 from __future__ import with_statement
18
19 import os
20
21 from trac.cache import cached
22 from trac.core import TracError
23 from trac.util.datefmt import from_utimestamp, to_utimestamp
24 from trac.util.translation import _
25 from trac.versioncontrol import Changeset, Node, Repository, NoSuchChangeset
26
27
28 _kindmap = {'D': Node.DIRECTORY, 'F': Node.FILE}
29 _actionmap = {'A': Changeset.ADD, 'C': Changeset.COPY,
30 'D': Changeset.DELETE, 'E': Changeset.EDIT,
31 'M': Changeset.MOVE}
35
36 _inverted_kindmap = _invert_dict(_kindmap)
37 _inverted_actionmap = _invert_dict(_actionmap)
38
39 CACHE_REPOSITORY_DIR = 'repository_dir'
40 CACHE_YOUNGEST_REV = 'youngest_rev'
41
42 CACHE_METADATA_KEYS = (CACHE_REPOSITORY_DIR, CACHE_YOUNGEST_REV)
47
50
51 has_linear_changesets = False
52
53 scope = property(lambda self: self.repos.scope)
54
56 self.env = env
57 self.repos = repos
58 self._metadata_id = str(self.repos.id)
59 Repository.__init__(self, repos.name, repos.params, log)
60
62 self.repos.close()
63
65 return self.repos.get_base()
66
69
72
75
77 return self.repos.get_changeset_uid(rev)
78
80 for rev, in self.env.db_query("""
81 SELECT rev FROM revision
82 WHERE repos=%s AND time >= %s AND time < %s
83 ORDER BY time DESC, rev DESC
84 """, (self.id, to_utimestamp(start), to_utimestamp(stop))):
85 try:
86 yield self.get_changeset(rev)
87 except NoSuchChangeset:
88 pass # skip changesets currently being resync'ed
89
91 cset = self.repos.get_changeset(rev)
92 srev = self.db_rev(cset.rev)
93 old_cset = None
94
95 with self.env.db_transaction as db:
96 for time, author, message in db("""
97 SELECT time, author, message FROM revision
98 WHERE repos=%s AND rev=%s
99 """, (self.id, srev)):
100 old_cset = Changeset(self.repos, cset.rev, message, author,
101 from_utimestamp(time))
102 if old_cset:
103 db("""UPDATE revision SET time=%s, author=%s, message=%s
104 WHERE repos=%s AND rev=%s
105 """, (to_utimestamp(cset.date), cset.author,
106 cset.message, self.id, srev))
107 else:
108 self._insert_changeset(db, cset.rev, cset)
109 return old_cset
110
111 @cached('_metadata_id')
113 """Retrieve data for the cached `metadata` attribute."""
114 return dict(self.env.db_query("""
115 SELECT name, value FROM repository
116 WHERE id=%%s AND name IN (%s)
117 """ % ','.join(['%s'] * len(CACHE_METADATA_KEYS)),
118 (self.id,) + CACHE_METADATA_KEYS))
119
121 if clean:
122 self.remove_cache()
123
124 metadata = self.metadata
125 self.save_metadata(metadata)
126
127 # -- retrieve the youngest revision in the repository and the youngest
128 # revision cached so far
129 self.repos.clear()
130 repos_youngest = self.repos.youngest_rev
131 youngest = metadata.get(CACHE_YOUNGEST_REV)
132
133 # -- verify and normalize youngest revision
134 if youngest:
135 youngest = self.repos.normalize_rev(youngest)
136 if not youngest:
137 self.log.debug("normalize_rev failed (youngest_rev=%r, "
138 "reponame=%s)",
139 self.youngest_rev, _norm_reponame(self))
140 else:
141 self.log.debug("cache metadata undefined (youngest_rev=%r, "
142 "reponame=%s)",
143 self.youngest_rev, _norm_reponame(self))
144 youngest = None
145
146 # -- compare them and try to resync if different
147 next_youngest = None
148 if youngest != repos_youngest:
149 self.log.info("repos rev [%s] != cached rev [%s] in '%s'",
150 repos_youngest, youngest, _norm_reponame(self))
151 if youngest:
152 next_youngest = self.repos.next_rev(youngest)
153 else:
154 try:
155 next_youngest = self.repos.oldest_rev
156 # Ugly hack needed because doing that everytime in
157 # oldest_rev suffers from horrendeous performance (#5213)
158 if self.repos.scope != '/' and not \
159 self.repos.has_node('/', next_youngest):
160 next_youngest = self.repos.next_rev(next_youngest,
161 find_initial_rev=True)
162 next_youngest = self.repos.normalize_rev(next_youngest)
163 except TracError:
164 # can't normalize oldest_rev: repository was empty
165 return
166
167 if next_youngest is None: # nothing to cache yet
168 return
169 srev = self.db_rev(next_youngest)
170
171 # 0. first check if there's no (obvious) resync in progress
172 with self.env.db_query as db:
173 for rev, in db(
174 "SELECT rev FROM revision WHERE repos=%s AND rev=%s",
175 (self.id, srev)):
176 # already there, but in progress, so keep ''previous''
177 # notion of 'youngest'
178 self.repos.clear(youngest_rev=youngest)
179 return
180
181 # prepare for resyncing (there might still be a race
182 # condition at this point)
183 while next_youngest is not None:
184 srev = self.db_rev(next_youngest)
185
186 with self.env.db_transaction as db:
187 self.log.info("Trying to sync revision [%s] in '%s'",
188 next_youngest, _norm_reponame(self))
189 cset = self.repos.get_changeset(next_youngest)
190 try:
191 # steps 1. and 2.
192 self._insert_changeset(db, next_youngest, cset)
193 except Exception, e: # *another* 1.1. resync attempt won
194 if isinstance(e, self.env.db_exc.IntegrityError):
195 self.log.warning("Revision %s in '%s' already "
196 "cached: %r", next_youngest,
197 _norm_reponame(self), e)
198 else:
199 self.log.error("Unable to create cache records "
200 "for revision %s in '%s': %r",
201 next_youngest, _norm_reponame(self),
202 e)
203 # the other resync attempts is also
204 # potentially still in progress, so for our
205 # process/thread, keep ''previous'' notion of
206 # 'youngest'
207 self.repos.clear(youngest_rev=youngest)
208 # FIXME: This aborts a containing transaction
209 db.rollback()
210 return
211
212 # 3. update 'youngest_rev' metadata (minimize
213 # possibility of failures at point 0.)
214 db("""
215 UPDATE repository SET value=%s WHERE id=%s AND name=%s
216 """, (str(next_youngest), self.id, CACHE_YOUNGEST_REV))
217 del self.metadata
218
219 # 4. iterate (1. should always succeed now)
220 youngest = next_youngest
221 next_youngest = self.repos.next_rev(next_youngest)
222
223 # 5. provide some feedback
224 if feedback:
225 feedback(youngest)
226
228 """Remove the repository cache."""
229 self.log.info("Cleaning cache in '%s'", _norm_reponame(self))
230 with self.env.db_transaction as db:
231 db("DELETE FROM revision WHERE repos=%s",
232 (self.id,))
233 db("DELETE FROM node_change WHERE repos=%s",
234 (self.id,))
235 db.executemany("DELETE FROM repository WHERE id=%s AND name=%s",
236 [(self.id, k) for k in CACHE_METADATA_KEYS])
237 db.executemany("""
238 INSERT INTO repository (id, name, value)
239 VALUES (%s, %s, %s)
240 """, [(self.id, k, '') for k in CACHE_METADATA_KEYS])
241 del self.metadata
242
244 """Save the repository metadata."""
245 with self.env.db_transaction as db:
246 invalidate = False
247
248 # -- check that we're populating the cache for the correct
249 # repository
250 repository_dir = metadata.get(CACHE_REPOSITORY_DIR)
251 if repository_dir:
252 # directory part of the repo name can vary on case insensitive
253 # fs
254 if os.path.normcase(repository_dir) \
255 != os.path.normcase(self.name):
256 self.log.info("'repository_dir' has changed from %r to %r",
257 repository_dir, self.name)
258 raise TracError(_("The repository directory has changed, "
259 "you should resynchronize the "
260 "repository with: trac-admin $ENV "
261 "repository resync '%(reponame)s'",
262 reponame=_norm_reponame(self)))
263 elif repository_dir is None: #
264 self.log.info('Storing initial "repository_dir": %s',
265 self.name)
266 db("""INSERT INTO repository (id, name, value)
267 VALUES (%s, %s, %s)
268 """, (self.id, CACHE_REPOSITORY_DIR, self.name))
269 invalidate = True
270 else: # 'repository_dir' cleared by a resync
271 self.log.info('Resetting "repository_dir": %s', self.name)
272 db("UPDATE repository SET value=%s WHERE id=%s AND name=%s",
273 (self.name, self.id, CACHE_REPOSITORY_DIR))
274 invalidate = True
275
276 # -- insert a 'youngeset_rev' for the repository if necessary
277 if CACHE_YOUNGEST_REV not in metadata:
278 db("""INSERT INTO repository (id, name, value)
279 VALUES (%s, %s, %s)
280 """, (self.id, CACHE_YOUNGEST_REV, ''))
281 invalidate = True
282
283 if invalidate:
284 del self.metadata
285
287 srev = self.db_rev(rev)
288 # 1. Attempt to resync the 'revision' table. In case of
289 # concurrent syncs, only such insert into the `revision` table
290 # will succeed, the others will fail and raise an exception.
291 db("""
292 INSERT INTO revision (repos,rev,time,author,message)
293 VALUES (%s,%s,%s,%s,%s)
294 """, (self.id, srev, to_utimestamp(cset.date),
295 cset.author, cset.message))
296 # 2. now *only* one process was able to get there (i.e. there
297 # *shouldn't* be any race condition here)
298 for path, kind, action, bpath, brev in cset.get_changes():
299 self.log.debug("Caching node change in [%s] in '%s': %r",
300 rev, _norm_reponame(self.repos),
301 (path, kind, action, bpath, brev))
302 kind = _inverted_kindmap[kind]
303 action = _inverted_actionmap[action]
304 db("""
305 INSERT INTO node_change
306 (repos,rev,path,node_type,change_type,base_path,
307 base_rev)
308 VALUES (%s,%s,%s,%s,%s,%s,%s)
309 """, (self.id, srev, path, kind, action, bpath, brev))
310
313
315 """Return the revisions affecting `path` between `first` and `last`
316 revisions.
317 """
318 last = self.normalize_rev(last)
319 slast = self.db_rev(last)
320 node = self.get_node(path, last) # Check node existence
321 with self.env.db_query as db:
322 if first is None:
323 first = db("""
324 SELECT MAX(rev) FROM node_change
325 WHERE repos=%s AND rev<=%s AND path=%s
326 AND change_type IN ('A', 'C', 'M')
327 """, (self.id, slast, path))
328 first = int(first[0][0]) if first[0][0] is not None else 0
329 sfirst = self.db_rev(first)
330 return [int(rev) for rev, in db("""
331 SELECT DISTINCT rev FROM node_change
332 WHERE repos=%%s AND rev>=%%s AND rev<=%%s
333 AND (path=%%s OR path %s)""" % db.prefix_match(),
334 (self.id, sfirst, slast, path,
335 db.prefix_match_value(path + '/')))]
336
338 if not node_infos:
339 return {}
340
341 node_infos = [(node, self.normalize_rev(first)) for node, first
342 in node_infos]
343 sfirst = self.db_rev(min(first for node, first in node_infos))
344 slast = self.db_rev(max(node.rev for node, first in node_infos))
345 path_infos = dict((node.path, (node, first)) for node, first
346 in node_infos)
347 path_revs = dict((node.path, []) for node, first in node_infos)
348
349 db = self.env.get_read_db()
350 cursor = db.cursor()
351 prefix_match = db.prefix_match()
352
353 # Prevent "too many SQL variables" since max number of parameters is
354 # 999 on SQLite. No limitation on PostgreSQL and MySQL.
355 idx = 0
356 delta = (999 - 3) // 5
357 while idx < len(node_infos):
358 subset = node_infos[idx:idx + delta]
359 idx += delta
360 count = len(subset)
361
362 holders = ','.join(('%s',) * count)
363 query = """\
364 SELECT DISTINCT
365 rev, (CASE WHEN path IN (%s) THEN path %s END) AS path
366 FROM node_change
367 WHERE repos=%%s AND rev>=%%s AND rev<=%%s AND (path IN (%s) %s)
368 """ % \
369 (holders,
370 ' '.join(('WHEN path ' + prefix_match + ' THEN %s',) * count),
371 holders,
372 ' '.join(('OR path ' + prefix_match,) * count))
373 args = []
374 args.extend(node.path for node, first in subset)
375 for node, first in subset:
376 args.append(db.prefix_match_value(node.path + '/'))
377 args.append(node.path)
378 args.extend((self.id, sfirst, slast))
379 args.extend(node.path for node, first in subset)
380 args.extend(db.prefix_match_value(node.path + '/')
381 for node, first in subset)
382 cursor.execute(query, args)
383
384 for srev, path in cursor:
385 rev = self.rev_db(srev)
386 node, first = path_infos[path]
387 if first <= rev <= node.rev:
388 path_revs[path].append(rev)
389
390 return path_revs
391
394
396 return self.repos.oldest_rev
397
400
402 # Hitting the repository directly is faster than searching the
403 # database. When there is a long stretch of inactivity on a file (in
404 # particular, when a file is added late in the history) the database
405 # query can take a very long time to determine that there is no
406 # previous revision in the node_changes table. However, the repository
407 # will have a datastructure that will allow it to find the previous
408 # version of a node fairly directly.
409 #if self.has_linear_changesets:
410 # return self._next_prev_rev('<', rev, path)
411 return self.repos.previous_rev(self.normalize_rev(rev), path)
412
414 if self.has_linear_changesets:
415 return self._next_prev_rev('>', rev, path)
416 else:
417 return self.repos.next_rev(self.normalize_rev(rev), path)
418
420 srev = self.db_rev(rev)
421 with self.env.db_query as db:
422 # the changeset revs are sequence of ints:
423 sql = "SELECT %(aggr)s(rev) FROM %(tab)s " \
424 "WHERE repos=%%s AND rev%(dir)s%%s"
425 aggr = 'MAX' if direction == '<' else 'MIN'
426 args = [self.id, srev]
427
428 if path:
429 path = path.lstrip('/')
430 sql %= {'aggr': aggr, 'dir': direction, 'tab': 'node_change'}
431 # changes on path itself or its children
432 sql += " AND (path=%s OR path " + db.prefix_match()
433 args.extend((path, db.prefix_match_value(path + '/')))
434 # deletion of path ancestors
435 components = path.lstrip('/').split('/')
436 parents = ','.join(('%s',) * len(components))
437 sql += " OR (path IN (" + parents + ") AND change_type='D'))"
438 for i in range(1, len(components) + 1):
439 args.append('/'.join(components[:i]))
440 else:
441 sql %= {'aggr': aggr, 'dir': direction, 'tab': 'revision'}
442
443 for rev, in db(sql, args):
444 if rev is not None:
445 return int(rev)
446
448 if self.has_linear_changesets:
449 return Repository.parent_revs(self, rev)
450 else:
451 return self.repos.parent_revs(rev)
452
456
460
463
465 if rev is None or isinstance(rev, basestring) and \
466 rev.lower() in ('', 'head', 'latest', 'youngest'):
467 return self.rev_db(self.youngest_rev or 0)
468 else:
469 try:
470 rev = int(rev)
471 if rev <= self.youngest_rev:
472 return rev
473 except (ValueError, TypeError):
474 pass
475 raise NoSuchChangeset(rev)
476
480
484
487 return self.repos.get_changes(old_path, self.normalize_rev(old_rev),
488 new_path, self.normalize_rev(new_rev),
489 ignore_ancestry)
490
493
495 self.env = env
496 drev = repos.db_rev(rev)
497 for _date, author, message in self.env.db_query("""
498 SELECT time, author, message FROM revision
499 WHERE repos=%s AND rev=%s
500 """, (repos.id, drev)):
501 date = from_utimestamp(_date)
502 Changeset.__init__(self, repos, repos.rev_db(rev), message, author,
503 date)
504 break
505 else:
506 repos.log.debug("Missing revision record (%r, %r) in '%s'",
507 repos.id, drev, _norm_reponame(repos))
508 raise NoSuchChangeset(rev)
509
511 for path, kind, change, base_path, base_rev in sorted(
512 self.env.db_query("""
513 SELECT path, node_type, change_type, base_path, base_rev
514 FROM node_change WHERE repos=%s AND rev=%s
515 ORDER BY path
516 """, (self.repos.id, self.repos.db_rev(self.rev)))):
517 kind = _kindmap[kind]
518 change = _actionmap[change]
519 yield path, kind, change, base_path, self.repos.rev_db(base_rev)
520
523
| Trees | Indices | Help |
|
|---|
| Generated by Epydoc 3.0.1 on Wed Jul 5 01:52:06 2023 | http://epydoc.sourceforge.net |