-
Notifications
You must be signed in to change notification settings - Fork 11
/
Copy pathstats_cli.py
221 lines (182 loc) · 9.11 KB
/
stats_cli.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
import subprocess
import yaml
import tempfile
import os
from colorama import Fore, Style, init
# Initialize colorama for Windows compatibility (does nothing on Linux/macOS)
init(autoreset=True)
# Define the ages you're interested in
AGES = ["now", "1 month ago", "2 months ago", "3 months ago", "4 months ago", "5 months ago", "6 months ago"]
# Path to the Git repository
REPO_PATH = "/Users/fprior/Development/GitFarm/workplace/fprior/update-mirror-3/update-mirror/src/AWSDocsSdkExamplesPublic"
# Path to the YAML file you want to extract from each commit
FILE_PATH = "tools/update_mirror/config.yaml"
def run_git_command(command, cwd=None):
"""
Run a Git command and return the output.
Args:
command (str): The Git command to run.
cwd (str, optional): The directory to run the command in. Defaults to None.
Returns:
str: The output of the Git command, or None if an error occurred.
"""
try:
result = subprocess.run(command, cwd=cwd, text=True, capture_output=True, shell=True)
if result.returncode != 0:
raise subprocess.CalledProcessError(result.returncode, command, result.stderr)
return result.stdout.strip()
except Exception as e:
print(f"{Fore.RED}Error running command: {command}\n{Fore.RED}Error: {e}")
return None
def clone_or_reuse_repo(repo_url, branch, clone_dir):
"""
Clone a Git repository if it doesn't already exist, or reuse the existing clone.
Args:
repo_url (str): The URL of the repository to clone.
branch (str): The branch to check out.
clone_dir (str): The directory to clone the repository into.
"""
if os.path.exists(clone_dir):
print(f"{Fore.YELLOW}Repository {repo_url} already cloned in {clone_dir}. Reusing existing clone.")
else:
print(f"{Fore.GREEN}Cloning repository {repo_url} into {clone_dir}...")
clone_cmd = f"git clone {repo_url} {clone_dir}"
run_git_command(clone_cmd)
# Checkout the correct branch
print(f"{Fore.CYAN}Checking out branch {branch} in {clone_dir}...")
checkout_cmd = f"git checkout {branch}"
run_git_command(checkout_cmd, cwd=clone_dir)
def get_commit_hash_for_age(repo_dir, age):
"""
Get the commit hash for a specific age in the repository's history.
Args:
repo_dir (str): The path to the Git repository.
age (str): The age to search for (e.g., '1 month ago').
Returns:
str: The commit hash corresponding to the specified age, or None if not found.
"""
log_cmd = f'git rev-list -1 --before="{age}" HEAD'
commit_hash = run_git_command(log_cmd, cwd=repo_dir)
if commit_hash:
return commit_hash
else:
print(f"{Fore.RED}Failed to find commit hash for {age} in {repo_dir}")
return None
def checkout_commit(repo_dir, commit_hash):
"""
Checkout a specific commit hash in the repository.
Args:
repo_dir (str): The path to the Git repository.
commit_hash (str): The commit hash to checkout.
"""
print(f"{Fore.CYAN}Checking out commit {commit_hash} in {repo_dir}")
checkout_cmd = f'git checkout --force {commit_hash}'
run_git_command(checkout_cmd, cwd=repo_dir)
def run_commands_in_repo(repo_dir, commit_hash, age):
"""
Run Git log and a Python command in a specific commit of a repository.
Args:
repo_dir (str): The path to the repository.
commit_hash (str): The commit hash to run the commands in.
age (str): The age of the commit being processed.
"""
# Checkout repo at the specific commit
checkout_commit(repo_dir, commit_hash)
# Retrieve commit details
log_cmd = f'git log -n 1 {commit_hash} --pretty=format:"%H|%an|%aI"'
log_output = run_git_command(log_cmd, cwd=repo_dir)
if log_output:
log_parts = log_output.split('|')
commit_hash = log_parts[0]
author_name = log_parts[1]
commit_date = log_parts[2]
print(f"{Fore.MAGENTA}Commit for {age}: {commit_hash}, Author: {author_name}, Date: {commit_date}")
# Run the Python command on the repo
python_cmd = f'python3 -m aws_doc_sdk_examples_tools.stats "{repo_dir}"'
print(f"{Fore.CYAN}Running stats command for repository: {repo_dir}")
output = run_git_command(python_cmd, cwd=repo_dir)
if output:
print(output)
print("###########################")
else:
print(f"{Fore.RED}No commit found for {age} in {repo_dir}")
def get_file_from_commits_and_clone(repo_path, file_path, ages):
"""
Extract file contents from specific commits and clone repositories for each mirror.
Args:
repo_path (str): Path to the main repository.
file_path (str): Path to the YAML file within the repository.
ages (list): List of age ranges to retrieve commit hashes for.
Returns:
dict: A dictionary mapping each age to the mirrors section of the YAML file.
"""
age_content_dict = {}
cloned_repos = {} # To track cloned repositories and their directories
# Create a tmp directory for the clones
with tempfile.TemporaryDirectory() as tmp_dir:
# Fetch the config file from the main repository for each age
for age in ages:
print(f"{Style.BRIGHT}{Fore.BLUE}#############################################################")
print(f"{Style.BRIGHT}{Fore.BLUE}######################## {age.upper()} ##############################")
print(f"{Style.BRIGHT}{Fore.BLUE}#############################################################")
# Get the commit hash for the main repository
main_commit_hash = get_commit_hash_for_age(repo_path, age)
if not main_commit_hash:
print(f"{Fore.RED}Skipping {age} because commit hash could not be retrieved for the main repository.")
continue
# Get the YAML configuration from that commit
file_content = run_git_command(f"git show {main_commit_hash}:{file_path}", cwd=repo_path)
if file_content:
try:
# Parse the YAML content
yaml_content = yaml.safe_load(file_content)
# Extract the mirrors section
mirrors = yaml_content.get('mirrors', {})
age_content_dict[age] = mirrors
# Clone or reuse repos for each mirror
for mirror_name, mirror_info in mirrors.items():
repo_url = mirror_info.get('git_mirror')
branch = mirror_info.get('branch')
dir_name = mirror_info.get('dir')
# Check if repo has already been cloned
if repo_url not in cloned_repos:
# If not, clone the repository to a subfolder in the temp directory
clone_dir = os.path.join(tmp_dir, dir_name)
clone_or_reuse_repo(repo_url, branch, clone_dir)
# Mark this repo as cloned
cloned_repos[repo_url] = clone_dir
else:
print(f"{Fore.YELLOW}Reusing cloned repository {repo_url} for mirror {mirror_name}.")
# Get the specific commit hash for this repo at this age
repo_commit_hash = get_commit_hash_for_age(cloned_repos[repo_url], age)
if repo_commit_hash:
# Run commands in the cloned repository for the specific commit hash
run_commands_in_repo(cloned_repos[repo_url], repo_commit_hash, age)
else:
print(f"{Fore.RED}Skipping {mirror_name} in {age} due to failure in retrieving commit hash.")
except yaml.YAMLError as exc:
print(f"{Fore.RED}Error parsing YAML file for {age}: {exc}")
else:
print(f"{Fore.RED}No file content found for commit {main_commit_hash} at {age}")
return age_content_dict
def display_age_content_dict(age_content_dict):
"""
Display the mirrors section extracted from the YAML files, grouped by age.
Args:
age_content_dict (dict): A dictionary with age as keys and mirrors data as values.
"""
print(f"{Style.BRIGHT}{Fore.GREEN}File contents grouped by age range:")
for age, mirrors in age_content_dict.items():
print(f"{Fore.YELLOW}Age: {age}")
print(f"{Fore.CYAN}Mirrors:")
for mirror_name, mirror_info in mirrors.items():
print(f"{Fore.MAGENTA} - {mirror_name}:")
print(f" {Fore.CYAN}git_mirror: {mirror_info.get('git_mirror')}")
print(f" {Fore.CYAN}branch: {mirror_info.get('branch')}")
print(f" {Fore.CYAN}dir: {mirror_info.get('dir')}")
print("-----------------")
if __name__ == "__main__":
# Get the contents from the commits and clone the repos.
age_content_dict = get_file_from_commits_and_clone(REPO_PATH, FILE_PATH, AGES)
# Display the contents in the dictionary format.
display_age_content_dict(age_content_dict)