-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathget_commit_history.py
69 lines (66 loc) · 2.59 KB
/
get_commit_history.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
import re
import sys
from tqdm import tqdm
from datetime import datetime
from pymongo import MongoClient
project = sys.argv[1]
log_path = '/data/repos/{}.log'.format(project)
log_file = open(log_path, errors='ignore')
content = log_file.read()
log_file.close()
client = MongoClient(host='127.0.0.1', port=27017)
db = client['os_log']
commits_collection = db['{}_commits'.format(project)]
commits_collection.drop()
commits_collection = db['{}_commits'.format(project)]
file_history_collection = db['{}_file_history'.format(project)]
file_history_collection.drop()
file_history_collection = db['{}_file_history'.format(project)]
records = list(content.split('STARTOFTHECOMMIT:')[1:])
docs = []
file_history = []
for record in tqdm(records):
try:
info, modifies = record.rsplit('NOTES\n', 1)
repo, ctp, body = info.split('\n', 2)
commit, tree, parent_commits, author_name, author_email, author_time, committer_name, committer_email, committer_time, title = ctp.split(';', 9)
parent_commits = parent_commits.split(' ')
doc = {
"repository": repo,
"commit": commit,
"tree": tree,
"parant_commits": parent_commits,
"author_name": author_name,
"author_email": author_email,
"author_time": datetime.utcfromtimestamp(int(author_time)),
"committer_name": committer_name,
"committer_email": committer_email,
"committer_time": datetime.utcfromtimestamp(int(committer_time)),
"message": title + '\n' + body
}
modifies = modifies.split('\n')[1:-1]
if modifies != ['']:
pattern = re.compile('(?P<addition>(\d+|-))\s*(?P<deletion>(\d+|-))\s*(?P<file_name>.*)')
for item in modifies:
tmp = pattern.search(item).groupdict()
addtion, deletion, file_name = tmp['addition'], tmp['deletion'], tmp['file_name']
if addtion == '-':
addtion = 0
if deletion == '-':
deletion = 0
file_change = {
"repository": repo,
"file_name": file_name,
"commit": commit,
"addition": int(addtion),
"deletion": int(deletion)
}
file_history.append(file_change)
except:
print(record)
print(item)
break
docs.append(doc)
print("commits: {}, file changes: {}".format(len(docs), len(file_history)))
commits_collection.insert_many(docs)
file_history_collection.insert_many(file_history)