-
Notifications
You must be signed in to change notification settings - Fork 9
/
Copy pathgit_hyper_blame.py
executable file
·391 lines (309 loc) · 12.2 KB
/
git_hyper_blame.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
#!/usr/bin/env python3
# Copyright 2016 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""Wrapper around git blame that ignores certain commits.
"""
from __future__ import print_function
from __future__ import unicode_literals
import argparse
import collections
import logging
import os
import subprocess2
import sys
import git_common
import git_dates
import setup_color
logging.getLogger().setLevel(logging.INFO)
DEFAULT_IGNORE_FILE_NAME = '.git-blame-ignore-revs'
class Commit(object):
"""Info about a commit."""
def __init__(self, commithash):
self.commithash = commithash
self.author = None
self.author_mail = None
self.author_time = None
self.author_tz = None
self.committer = None
self.committer_mail = None
self.committer_time = None
self.committer_tz = None
self.summary = None
self.boundary = None
self.previous = None
self.filename = None
def __repr__(self): # pragma: no cover
return '<Commit %s>' % self.commithash
BlameLine = collections.namedtuple(
'BlameLine',
'commit context lineno_then lineno_now modified')
def parse_blame(blameoutput):
"""Parses the output of git blame -p into a data structure."""
lines = blameoutput.split('\n')
i = 0
commits = {}
while i < len(lines):
# Read a commit line and parse it.
line = lines[i]
i += 1
if not line.strip():
continue
commitline = line.split()
commithash = commitline[0]
lineno_then = int(commitline[1])
lineno_now = int(commitline[2])
try:
commit = commits[commithash]
except KeyError:
commit = Commit(commithash)
commits[commithash] = commit
# Read commit details until we find a context line.
while i < len(lines):
line = lines[i]
i += 1
if line.startswith('\t'):
break
try:
key, value = line.split(' ', 1)
except ValueError:
key = line
value = True
setattr(commit, key.replace('-', '_'), value)
context = line[1:]
yield BlameLine(commit, context, lineno_then, lineno_now, False)
def print_table(outbuf, table, align):
"""Print a 2D rectangular array, aligning columns with spaces.
Args:
align: string of 'l' and 'r', designating whether each column is left- or
right-aligned.
"""
if len(table) == 0:
return
colwidths = None
for row in table:
if colwidths is None:
colwidths = [len(x) for x in row]
else:
colwidths = [max(colwidths[i], len(x)) for i, x in enumerate(row)]
for row in table:
cells = []
for i, cell in enumerate(row):
padding = ' ' * (colwidths[i] - len(cell))
if align[i] == 'r':
cell = padding + cell
elif i < len(row) - 1:
# Do not pad the final column if left-aligned.
cell += padding
cells.append(cell.encode('utf-8', 'replace'))
try:
outbuf.write(b' '.join(cells) + b'\n')
except IOError: # pragma: no cover
# Can happen on Windows if the pipe is closed early.
pass
def pretty_print(outbuf, parsedblame, show_filenames=False):
"""Pretty-prints the output of parse_blame."""
table = []
for line in parsedblame:
author_time = git_dates.timestamp_offset_to_datetime(
line.commit.author_time, line.commit.author_tz)
row = [line.commit.commithash[:8],
'(' + line.commit.author,
git_dates.datetime_string(author_time),
str(line.lineno_now) + ('*' if line.modified else '') + ')',
line.context]
if show_filenames:
row.insert(1, line.commit.filename)
table.append(row)
print_table(outbuf, table, align='llllrl' if show_filenames else 'lllrl')
def get_parsed_blame(filename, revision='HEAD'):
blame = git_common.blame(filename, revision=revision, porcelain=True)
return list(parse_blame(blame))
# Map from (oldrev, newrev) to hunk list (caching the results of git diff, but
# only the hunk line numbers, not the actual diff contents).
# hunk list contains (old, new) pairs, where old and new are (start, length)
# pairs. A hunk list can also be None (if the diff failed).
diff_hunks_cache = {}
def cache_diff_hunks(oldrev, newrev):
def parse_start_length(s):
# Chop the '-' or '+'.
s = s[1:]
# Length is optional (defaults to 1).
try:
start, length = s.split(',')
except ValueError:
start = s
length = 1
return int(start), int(length)
try:
return diff_hunks_cache[(oldrev, newrev)]
except KeyError:
pass
# Use -U0 to get the smallest possible hunks.
diff = git_common.diff(oldrev, newrev, '-U0')
# Get all the hunks.
hunks = []
for line in diff.split('\n'):
if not line.startswith('@@'):
continue
ranges = line.split(' ', 3)[1:3]
ranges = tuple(parse_start_length(r) for r in ranges)
hunks.append(ranges)
diff_hunks_cache[(oldrev, newrev)] = hunks
return hunks
def approx_lineno_across_revs(filename, newfilename, revision, newrevision,
lineno):
"""Computes the approximate movement of a line number between two revisions.
Consider line |lineno| in |filename| at |revision|. This function computes the
line number of that line in |newfilename| at |newrevision|. This is
necessarily approximate.
Args:
filename: The file (within the repo) at |revision|.
newfilename: The name of the same file at |newrevision|.
revision: A git revision.
newrevision: Another git revision. Note: Can be ahead or behind |revision|.
lineno: Line number within |filename| at |revision|.
Returns:
Line number within |newfilename| at |newrevision|.
"""
# This doesn't work that well if there are a lot of line changes within the
# hunk (demonstrated by GitHyperBlameLineMotionTest.testIntraHunkLineMotion).
# A fuzzy heuristic that takes the text of the new line and tries to find a
# deleted line within the hunk that mostly matches the new line could help.
# Use the <revision>:<filename> syntax to diff between two blobs. This is the
# only way to diff a file that has been renamed.
old = '%s:%s' % (revision, filename)
new = '%s:%s' % (newrevision, newfilename)
hunks = cache_diff_hunks(old, new)
cumulative_offset = 0
# Find the hunk containing lineno (if any).
for (oldstart, oldlength), (newstart, newlength) in hunks:
cumulative_offset += newlength - oldlength
if lineno >= oldstart + oldlength:
# Not there yet.
continue
if lineno < oldstart:
# Gone too far.
break
# lineno is in [oldstart, oldlength] at revision; [newstart, newlength] at
# newrevision.
# If newlength == 0, newstart will be the line before the deleted hunk.
# Since the line must have been deleted, just return that as the nearest
# line in the new file. Caution: newstart can be 0 in this case.
if newlength == 0:
return max(1, newstart)
newend = newstart + newlength - 1
# Move lineno based on the amount the entire hunk shifted.
lineno = lineno + newstart - oldstart
# Constrain the output within the range [newstart, newend].
return min(newend, max(newstart, lineno))
# Wasn't in a hunk. Figure out the line motion based on the difference in
# length between the hunks seen so far.
return lineno + cumulative_offset
def hyper_blame(outbuf, ignored, filename, revision):
# Map from commit to parsed blame from that commit.
blame_from = {}
filename = os.path.normpath(filename)
def cache_blame_from(filename, commithash):
try:
return blame_from[commithash]
except KeyError:
parsed = get_parsed_blame(filename, commithash)
blame_from[commithash] = parsed
return parsed
try:
parsed = cache_blame_from(filename, git_common.hash_one(revision))
except subprocess2.CalledProcessError as e:
sys.stderr.write(e.stderr.decode())
return e.returncode
new_parsed = []
# We don't show filenames in blame output unless we have to.
show_filenames = False
for line in parsed:
# If a line references an ignored commit, blame that commit's parent
# repeatedly until we find a non-ignored commit.
while line.commit.commithash in ignored:
if line.commit.previous is None:
# You can't ignore the commit that added this file.
break
previouscommit, previousfilename = line.commit.previous.split(' ', 1)
parent_blame = cache_blame_from(previousfilename, previouscommit)
if len(parent_blame) == 0:
# The previous version of this file was empty, therefore, you can't
# ignore this commit.
break
# line.lineno_then is the line number in question at line.commit. We need
# to translate that line number so that it refers to the position of the
# same line on previouscommit.
lineno_previous = approx_lineno_across_revs(
line.commit.filename, previousfilename, line.commit.commithash,
previouscommit, line.lineno_then)
logging.debug('ignore commit %s on line p%d/t%d/n%d',
line.commit.commithash, lineno_previous, line.lineno_then,
line.lineno_now)
# Get the line at lineno_previous in the parent commit.
assert 1 <= lineno_previous <= len(parent_blame)
newline = parent_blame[lineno_previous - 1]
# Replace the commit and lineno_then, but not the lineno_now or context.
line = BlameLine(newline.commit, line.context, newline.lineno_then,
line.lineno_now, True)
logging.debug(' replacing with %r', line)
# If any line has a different filename to the file's current name, turn on
# filename display for the entire blame output.
# Use normpath to make variable consistent across platforms.
if os.path.normpath(line.commit.filename) != filename:
show_filenames = True
new_parsed.append(line)
pretty_print(outbuf, new_parsed, show_filenames=show_filenames)
return 0
def parse_ignore_file(ignore_file):
for line in ignore_file:
line = line.split('#', 1)[0].strip()
if line:
yield line
def main(args, outbuf):
parser = argparse.ArgumentParser(
prog='git hyper-blame',
description='git blame with support for ignoring certain commits.')
parser.add_argument('-i', metavar='REVISION', action='append', dest='ignored',
default=[], help='a revision to ignore')
parser.add_argument('--ignore-file', metavar='FILE', dest='ignore_file',
help='a file containing a list of revisions to ignore')
parser.add_argument('--no-default-ignores', dest='no_default_ignores',
action='store_true',
help='Do not ignore commits from .git-blame-ignore-revs.')
parser.add_argument('revision', nargs='?', default='HEAD', metavar='REVISION',
help='revision to look at')
parser.add_argument('filename', metavar='FILE', help='filename to blame')
args = parser.parse_args(args)
try:
repo_root = git_common.repo_root()
except subprocess2.CalledProcessError as e:
sys.stderr.write(e.stderr.decode())
return e.returncode
# Make filename relative to the repository root, and cd to the root dir (so
# all filenames throughout this script are relative to the root).
filename = os.path.relpath(args.filename, repo_root)
os.chdir(repo_root)
# Normalize filename so we can compare it to other filenames git gives us.
filename = os.path.normpath(filename)
filename = os.path.normcase(filename)
ignored_list = list(args.ignored)
if not args.no_default_ignores and os.path.exists(DEFAULT_IGNORE_FILE_NAME):
with open(DEFAULT_IGNORE_FILE_NAME) as ignore_file:
ignored_list.extend(parse_ignore_file(ignore_file))
if args.ignore_file:
with open(args.ignore_file) as ignore_file:
ignored_list.extend(parse_ignore_file(ignore_file))
ignored = set()
for c in ignored_list:
try:
ignored.add(git_common.hash_one(c))
except subprocess2.CalledProcessError as e:
# Custom warning string (the message from git-rev-parse is inappropriate).
sys.stderr.write('warning: unknown revision \'%s\'.\n' % c)
return hyper_blame(outbuf, ignored, filename, args.revision)
if __name__ == '__main__': # pragma: no cover
setup_color.init()
with git_common.less() as less_input:
sys.exit(main(sys.argv[1:], less_input))