-
Notifications
You must be signed in to change notification settings - Fork 7
/
Copy pathgit.py
276 lines (222 loc) · 8.8 KB
/
git.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
import os
import subprocess
class GitCommit(object):
"""
Stores simple information about a single Git commit.
"""
def __init__(self, sha, author, message):
self.sha = sha
self.author = author
self.message = message
class GitBlameLine(object):
"""
Stores the blame output for a single line of a file.
"""
def __init__(self, sha, line, current, original_line, final_line):
self.sha = sha
self.line = line
self.current = current
self.original_line = original_line
self.final_line = final_line
class GitFileHistory(object):
"""
Responsible for following the history of a single file, moving around
within that history, and giving information about the state of the file
at a particular revision or the differences between revisions.
Most operations are relative to the current commit, which can be changed
with the previous and next mthods and accessed through the current_commit
property.
"""
def __init__(self, path, start_commit):
if not verify_revision(start_commit):
raise ValueError('%s is not a valid commit, branch, tag, etc.' % (
start_commit,
))
if not verify_file(path, start_commit):
raise ValueError('"%s" is not tracked by git at commit %s' % (path, start_commit))
self.path = path
p = os.popen('git log %s --follow --pretty="%s" -- %s' % (
start_commit,
'%H%n%an%n%s%n',
self.path,
))
output = p.read().split('\n\n')
self.commits = [GitCommit(*c.split('\n', 2)) for c in output if c]
self._index = 0
self._blame = None
self._line_mappings = {}
@property
def current_commit(self):
return self.commits[self._index]
def next(self):
"""
Moves to the next commit that touched this file, returning False
if we're already at the last commit that touched the file.
"""
if self._index <= 0:
return False
self._index -= 1
self._blame = None
return True
def prev(self):
"""
Moves to the previous commit that touched this file, returning False
if we're already at the first commit that touched the file.
"""
if self._index >= len(self.commits) - 1:
return False
self._index += 1
self._blame = None
return True
def jump_to_commit(self, sha):
"""
Moves to the given commit SHA, returning False if it doesn't exist.
"""
found_index = None
for i, commit in enumerate(self.commits):
if commit.sha == sha:
found_index = i
if found_index is None:
return False
self._index = found_index
self._blame = None
return True
def blame(self):
"""
Returns blame information for this file at the current commit as
a list of GitBlameLine objects.
"""
if self._blame:
return self._blame
lines = []
p = os.popen('git blame -p %s -- %s' % (
self.current_commit.sha,
self.path,
))
while True:
header = p.readline()
if not header:
break
# Header format:
# commit_sha original_line final_line[ lines_in_group]
sha, original_line, final_line = header.split(' ')[:3]
line = p.readline()
# Skip any addition headers describing the commit
while not line.startswith('\t'):
line = p.readline()
lines.append(GitBlameLine(
sha=sha,
line=line[1:],
current=(sha == self.current_commit.sha),
original_line=original_line,
final_line=final_line,
))
self._blame = lines
return self._blame
def line_mapping(self, start, finish):
"""
Returns a dict that represents how lines have moved between versions
of a file. The keys are the line numbers in the version of the file
at start, the values are where those lines have ended up in the version
at finish.
For example if at start the file is two lines, and at
finish a new line has been inserted between the two the mapping
would be:
{1:1, 2:3}
Deleted lines are represented by None. For example, if at start the
file were two lines, and the first had been deleted by finish:
{1:None, 2:1}
"""
key = start + '/' + finish
if key in self._line_mappings:
return self._line_mappings[key]
forward, backward = self._build_line_mappings(start, finish)
self._line_mappings[start + '/' + finish] = forward
self._line_mappings[finish + '/' + start] = backward
return forward
def _build_line_mappings(self, start, finish):
forward = {}
backward = {}
# We use `diff` to track blocks of added, deleted and unchanged lines
# in order to build the line mapping.
# Its `--old/new/unchanged-group-format` flags make this very easy;
# it generates output like this:
# u 8
# o 3
# n 4
# u 1
# for a diff in which the first 8 lines are unchanged, then 3 deleted,
# then 4 added and then 1 unchanged.
# Below, we parse this output.
#
# In order to get the file contents of the two commits into `diff`,
# we use the equivalent of bash's /dev/fd/N based process subsititution,
# which would look like this:
# diff <(git show commit1:file) <(git show commit2:file)
# (this works on all platforms where bash process substitution works).
p_start = os.popen('git show %s:%s' % (start, self.path))
p_finish = os.popen('git show %s:%s' % (finish, self.path))
p_diff = subprocess.Popen([
'diff',
'/dev/fd/' + str(p_start.fileno()),
'/dev/fd/' + str(p_finish.fileno()),
'--old-group-format=o %dn\n', # lower case n for old file
'--new-group-format=n %dN\n', # upper case N for new file
'--unchanged-group-format=u %dN\n', # for unchanged it doesn't matter if n or N
], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
(out, err) = p_diff.communicate()
assert err == ''
# Unfortunately, splitting the empty string in Python still gives us a singleton
# empty line (`''.split('\n') == ['']`), so we handle that case here.
diff_lines = [] if out == '' else out.strip().split('\n')
start_ln = 0
finish_ln = 0
for line in diff_lines:
assert len(line) >= 3
# Parse the output created with `diff` above.
typ, num_lines_str = line.split(' ')
num_lines = int(num_lines_str)
if typ == 'u': # unchanged lines, advance both sides
for i in range(num_lines):
forward[start_ln] = finish_ln
backward[finish_ln] = start_ln
start_ln += 1
finish_ln += 1
elif typ == 'o': # old/deleted lines, advance left side as they only exist there
for i in range(num_lines):
forward[start_ln] = None
start_ln += 1
elif typ == 'n': # new/added lines, advance right side as they only exist there
for i in range(num_lines):
backward[finish_ln] = None
finish_ln += 1
p = os.popen('git show %s:%s' % (start, self.path))
start_len = len(p.readlines())
p = os.popen('git show %s:%s' % (finish, self.path))
finish_len = len(p.readlines())
# Make sure the mappings stretch the the beginning and end of
# the files.
while start_ln <= start_len and finish_ln <= finish_len:
forward[start_ln] = finish_ln
backward[finish_ln] = start_ln
start_ln += 1
finish_ln += 1
return forward, backward
def verify_revision(rev):
"""
Verifies that a revision is valid in the current working directory,
and returns True or False accordingly.
Errors are not supressed, so if the revision is bad or the CWD isn't
a Git repository then Git's error message will be output.
"""
status = os.system('git rev-parse --verify --no-revs %s' % (
rev
))
return status == 0
def verify_file(path, commit):
"""
Verifies that a given file is tracked by Git and returns true or false
accordingly.
"""
exit_code = subprocess.Popen(['git', 'cat-file', '-e', '%s:%s' % (commit, path)]).wait()
return exit_code == 0