-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathapp.py
76 lines (62 loc) · 1.96 KB
/
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
import os
from datetime import datetime
from typing import List
import pandas as pd
from startrack.config import GITHUB_TOKEN_ENV
from startrack.core import (
list_organization_repositories,
RepositoryType,
RepositoryData,
to_dataframe
)
GITHUB_TOKEN = os.environ.get(GITHUB_TOKEN_ENV, None)
ORGANIZATION_NAMES = ["roboflow", "autodistill"]
def save_to_csv(df: pd.DataFrame, directory: str, filename: str) -> None:
"""
Save a DataFrame to a CSV file in the specified directory.
Args:
df (pd.DataFrame): The DataFrame to save.
directory (str): The directory where the CSV file will be saved.
filename (str): The name of the CSV file.
"""
if not os.path.exists(directory):
os.makedirs(directory)
file_path = os.path.join(directory, filename)
df.to_csv(file_path)
def get_all_organization_repositories(
github_token: str,
organization_name: str,
repository_type: RepositoryType
) -> List:
all_repositories = []
page = 1
while True:
repos = list_organization_repositories(
github_token=github_token,
organization_name=organization_name,
repository_type=repository_type,
page=page)
if not repos:
break
all_repositories.extend(repos)
page += 1
return all_repositories
all_repositories_json = []
for organization_name in ORGANIZATION_NAMES:
repositories_json = get_all_organization_repositories(
github_token=GITHUB_TOKEN,
organization_name=organization_name,
repository_type=RepositoryType.PUBLIC)
all_repositories_json.extend(repositories_json)
repositories = [
RepositoryData.from_json(repository_json)
for repository_json
in all_repositories_json]
df = to_dataframe(repositories)
df = df.set_index('full_name').T
current_date = datetime.now().strftime("%Y-%m-%d")
df.index = [current_date]
save_to_csv(
df=df,
directory='data',
filename='data.csv')