Skip to content

Commit

Permalink
start visualizations
Browse files Browse the repository at this point in the history
  • Loading branch information
ronikaufman committed Nov 18, 2024
1 parent 870fd50 commit 7005344
Showing 1 changed file with 63 additions and 0 deletions.
63 changes: 63 additions & 0 deletions code/myriad/loam_paper/visualization/script.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
import json
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

repos_info = pd.read_json("../dataset/repos_info.json")
all_loggedin_contributors = pd.read_json("../dataset/all_loggedin_contributors.json")

with open("../dataset/categories_info.json", "r") as f:
categories_info = json.load(f)

def avg_contributors_per_category_barplot():
# average number of anon/loggedin contributors per category
data = []
for cat_info in categories_info:
category = cat_info["category"]
repos = repos_info.loc[repos_info["category"] == category]
avg_anonymous = repos.loc[:, "anonymous_contributors"].mean()
avg_loggedin = repos.loc[:, "loggedin_contributors"].mean()
if avg_anonymous == avg_anonymous: data.append(
{
"category": category,
"avg_anonymous": avg_anonymous,
"avg_loggedin": avg_loggedin
}
)
data = pd.DataFrame(data)
data.set_index("category").plot(kind="bar", stacked=False, color=["steelblue", "red"])
plt.yscale("log")
plt.show()

def contributions_years_scatterplot():
# relationship between date created and number of contributions
g = sns.relplot(
data=repos_info,
x="created_at", y="total_contributions",
hue="category", size=10,
#legend=False,
#palette=cmap, sizes=(10, 200),
)
#left=0.05, right=0.55
#sns.move_legend(g, "lower center", bbox_to_anchor=(1, 1))
g.set(yscale="log")
plt.xticks(rotation=90)
plt.show()

def multi_contributors_barplot():
idx = 0
while all_loggedin_contributors.iloc[idx]["type"] == "Bot":
idx+=1
data = [{"number of repos contributed to": i+1, "number of accounts": 0} for i in range(len(all_loggedin_contributors.iloc[idx]["contributions"]))]
for index, row in all_loggedin_contributors.iterrows():
if (row["type"] == "Bot"): continue
nb_repos_contributed_to = len(row["contributions"])-1
data[nb_repos_contributed_to]["number of accounts"] += 1
data = pd.DataFrame(data)
g = sns.barplot(data, x="number of repos contributed to", y="number of accounts", color="orange")
plt.yscale("log")
plt.show()

avg_contributors_per_category_barplot()
contributions_years_scatterplot()
multi_contributors_barplot()

0 comments on commit 7005344

Please sign in to comment.