Skip to content

Commit

Permalink
update files
Browse files Browse the repository at this point in the history
  • Loading branch information
Jamie W committed Jul 5, 2017
1 parent 32cc2f7 commit 668a770
Show file tree
Hide file tree
Showing 16 changed files with 9,355 additions and 24 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,297 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {
"deletable": true,
"editable": true
},
"source": [
"## Jupyter Metrics Final Project - Get Data"
]
},
{
"cell_type": "markdown",
"metadata": {
"deletable": true,
"editable": true
},
"source": [
"### Pull Down Data from GitHub"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": true,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"import pandas as pd\n",
"import datetime\n",
"from github import Github"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"g = Github(\"jamieshq\", \"1ca9cfcf3a0f1922b95c582ff5fe5273d4c2a9a6\")\n",
"cutoff = datetime.datetime(2015, 3, 30, 11, 38, 5, 291165)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"def get_prs(org, repo):\n",
" prs = []\n",
" print(\"Getting PRs for {}/{}\".format(org.login, repo.name))\n",
" for pr in repo.get_pulls(state=\"all\"):\n",
" if pr.created_at < cutoff:\n",
" continue\n",
" prs.append({\n",
" 'date': pr.created_at,\n",
" 'user': pr.user.login,\n",
" 'number': pr.number,\n",
" 'org': org.login,\n",
" 'repo': repo.name,\n",
" 'is_merged': pr.is_merged(),\n",
" 'state': pr.state\n",
" })\n",
"\n",
" return prs"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"def get_pr_comments(org, repo):\n",
" comments = []\n",
" print(\"Getting PR comments for {}/{}\".format(org.login, repo.name))\n",
" for pr in repo.get_pulls(state=\"all\"):\n",
" if pr.created_at < cutoff:\n",
" continue\n",
" for comment in pr.get_comments():\n",
" comments.append({\n",
" 'date': comment.created_at,\n",
" 'user': comment.user.login,\n",
" 'number': pr.number,\n",
" 'org': org.login,\n",
" 'repo': repo.name\n",
" })\n",
" return comments"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"def get_issues(org, repo):\n",
" issues = []\n",
" print(\"Getting issues for {}/{}\".format(org.login, repo.name))\n",
" for issue in repo.get_issues(state=\"all\"):\n",
" if issue.created_at < cutoff:\n",
" continue\n",
" issues.append({\n",
" 'date': issue.created_at,\n",
" 'user': issue.user.login,\n",
" 'number': issue.number,\n",
" 'org': org.login,\n",
" 'repo': repo.name,\n",
" 'state': issue.state\n",
" })\n",
" \n",
" return issues"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"def get_issue_comments(org, repo):\n",
" comments = []\n",
" print(\"Getting issue comments for {}/{}\".format(org.login, repo.name))\n",
" for issue in repo.get_issues(state=\"all\"):\n",
" if issue.created_at < cutoff:\n",
" continue\n",
" for comment in issue.get_comments():\n",
" \n",
" comments.append({\n",
" 'issue_date': issue.created_at,\n",
" 'user': issue.user.login,\n",
" 'number': issue.number,\n",
" 'org': org.login,\n",
" 'repo': repo.name,\n",
" 'comments': comment.body,\n",
" 'comment_creation_date' : comment.created_at\n",
" \n",
" })\n",
" \n",
" return comments"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"#prs = []\n",
"#pr_comments = []\n",
"issues = []\n",
"issue_comments = []\n",
"\n",
"test_orgs = [\"jupyter-resources\"]\n",
"real_orgs = [\"jupyterlab\", \"jupyterhub\", \"jupyter-widgets\", \"jupyter-incubator\"]\n",
"for org_name in real_orgs:\n",
" org = g.get_organization(org_name)\n",
" for repo in org.get_repos():\n",
" #prs.extend(get_prs(org, repo))\n",
" #pr_comments.extend(get_pr_comments(org, repo))\n",
" #issues.extend(get_issues(org, repo))\n",
" issue_comments.extend(get_issue_comments(org, repo))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"issue_comments = pd.DataFrame(issue_comments).set_index(['org', 'repo', 'number', 'issue_date','comment_creation_date', 'comments']).sortlevel()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"issue_comments.to_csv(\"issue_comments.csv\", encoding=\"utf-8\")"
]
},
{
"cell_type": "markdown",
"metadata": {
"deletable": true,
"editable": true
},
"source": [
"Converting an epoch timestamp to datetime after querying for ratelimit reset time\n",
"I hit my GitHub ratelimit after reading through the first organization listed (Jupyter). I used the below curl command to learn my ratelimit reset time, which was about an hour after receiving the timeout.\n",
"curl -i https://api.github.com/users/username\n",
"HTTP/1.1 200 OK\n",
"Date: Mon, 01 Jul 2013 17:27:06 GMT\n",
"Status: 200 OK\n",
"X-RateLimit-Limit: 60\n",
"X-RateLimit-Remaining: 56\n",
"X-RateLimit-Reset: 1372700873\n",
"Sources: https://developer.github.com/v3/#rate-limiting http://stackoverflow.com/questions/12400256/python-converting-epoch-time-into-the-datetime"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [
{
"data": {
"text/plain": [
"'Sun Apr 2 22:11:54 2017'"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import datetime\n",
"datetime.datetime.fromtimestamp(1491196314).strftime('%c')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true,
"deletable": true,
"editable": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 2",
"language": "python",
"name": "python2"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.11"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Loading

0 comments on commit 668a770

Please sign in to comment.