Skip to content

Commit 353de08

Browse files
committed
Add git history file overview treemap
1 parent b13ff54 commit 353de08

File tree

3 files changed

+243
-0
lines changed

3 files changed

+243
-0
lines changed
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
// List git file directories and the number of files they contain
2+
3+
MATCH (git_file:File&Git&!Repository)
4+
WITH git_file.relativePath AS gitFileName
5+
,reverse(split(reverse(git_file.relativePath),'/')[0]) AS gitFileNameWithoutPath
6+
WITH *
7+
,rtrim(split(gitFileName, gitFileNameWithoutPath)[0], '/') AS gitFilePath
8+
,count(DISTINCT gitFileName) AS numberOfContainedFiles
9+
RETURN gitFilePath
10+
,last(split(gitFilePath, '/')) AS lastPathElement
11+
,size(split(gitFilePath, '/')) AS pathLength
12+
,count(DISTINCT gitFileName) AS numberOfContainedFiles
13+
// Debugging
14+
// ,gitFileName
15+
// ,gitFileNameWithoutPath
16+
// ,gitFileNameWithoutPath2
17+
ORDER BY gitFilePath ASC

jupyter/GitHistoryJava.ipynb

Lines changed: 225 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,225 @@
1+
{
2+
"cells": [
3+
{
4+
"attachments": {},
5+
"cell_type": "markdown",
6+
"id": "2f0eabc4",
7+
"metadata": {},
8+
"source": [
9+
"# Internal Dependencies\n",
10+
"<br> \n",
11+
"\n",
12+
"### References\n",
13+
"- [Analyze java package metrics in a graph database](https://joht.github.io/johtizen/data/2023/04/21/java-package-metrics-analysis.html)\n",
14+
"- [Calculate metrics](https://101.jqassistant.org/calculate-metrics/index.html)\n",
15+
"- [Neo4j Python Driver](https://neo4j.com/docs/api/python-driver/current)"
16+
]
17+
},
18+
{
19+
"cell_type": "code",
20+
"execution_count": 25,
21+
"id": "4191f259",
22+
"metadata": {},
23+
"outputs": [],
24+
"source": [
25+
"import os\n",
26+
"import pandas as pd\n",
27+
"from neo4j import GraphDatabase"
28+
]
29+
},
30+
{
31+
"cell_type": "code",
32+
"execution_count": 26,
33+
"id": "c57aadf9",
34+
"metadata": {},
35+
"outputs": [],
36+
"source": [
37+
"import matplotlib.pyplot as plot\n",
38+
"import squarify"
39+
]
40+
},
41+
{
42+
"cell_type": "code",
43+
"execution_count": 27,
44+
"id": "1c5dab37",
45+
"metadata": {},
46+
"outputs": [],
47+
"source": [
48+
"# Please set the environment variable \"NEO4J_INITIAL_PASSWORD\" in your shell \n",
49+
"# before starting jupyter notebook to provide the password for the user \"neo4j\". \n",
50+
"# It is not recommended to hardcode the password into jupyter notebook for security reasons.\n",
51+
"\n",
52+
"driver = GraphDatabase.driver(uri=\"bolt://localhost:7687\", auth=(\"neo4j\", os.environ.get(\"NEO4J_INITIAL_PASSWORD\")))\n",
53+
"driver.verify_connectivity()"
54+
]
55+
},
56+
{
57+
"cell_type": "code",
58+
"execution_count": 28,
59+
"id": "c1db254b",
60+
"metadata": {},
61+
"outputs": [],
62+
"source": [
63+
"def get_cypher_query_from_file(cypherFileName):\n",
64+
" with open(cypherFileName) as file:\n",
65+
" return ' '.join(file.readlines())"
66+
]
67+
},
68+
{
69+
"cell_type": "code",
70+
"execution_count": 29,
71+
"id": "59310f6f",
72+
"metadata": {},
73+
"outputs": [],
74+
"source": [
75+
"def query_cypher_to_data_frame(filename : str, limit: int = 10_000):\n",
76+
" cypher_query_template = \"{query}\\nLIMIT {row_limit}\"\n",
77+
" cypher_query = get_cypher_query_from_file(filename)\n",
78+
" cypher_query = cypher_query_template.format(query = cypher_query, row_limit = limit)\n",
79+
" records, summary, keys = driver.execute_query(cypher_query)\n",
80+
" return pd.DataFrame([r.values() for r in records], columns=keys)"
81+
]
82+
},
83+
{
84+
"cell_type": "code",
85+
"execution_count": 30,
86+
"id": "c09da482",
87+
"metadata": {},
88+
"outputs": [],
89+
"source": [
90+
"def query_first_non_empty_cypher_to_data_frame(*filenames : str, limit: int = 10_000):\n",
91+
" \"\"\"\n",
92+
" Executes the Cypher queries of the given files and returns the first result that is not empty.\n",
93+
" If all given file names result in empty results, the last (empty) result will be returned.\n",
94+
" By additionally specifying \"limit=\" the \"LIMIT\" keyword will appended to query so that only the first results get returned.\n",
95+
" \"\"\" \n",
96+
" result=pd.DataFrame()\n",
97+
" for filename in filenames:\n",
98+
" result=query_cypher_to_data_frame(filename, limit)\n",
99+
" if not result.empty:\n",
100+
" return result\n",
101+
" return result"
102+
]
103+
},
104+
{
105+
"cell_type": "code",
106+
"execution_count": 31,
107+
"id": "a56670c9",
108+
"metadata": {},
109+
"outputs": [],
110+
"source": [
111+
"#The following cell uses the build-in %html \"magic\" to override the CSS style for tables to a much smaller size.\n",
112+
"#This is especially needed for PDF export of tables with multiple columns."
113+
]
114+
},
115+
{
116+
"cell_type": "code",
117+
"execution_count": null,
118+
"id": "006b9dc8",
119+
"metadata": {},
120+
"outputs": [],
121+
"source": [
122+
"%%html\n",
123+
"<style>\n",
124+
"/* CSS style for smaller dataframe tables. */\n",
125+
".dataframe th {\n",
126+
" font-size: 8px;\n",
127+
"}\n",
128+
".dataframe td {\n",
129+
" font-size: 8px;\n",
130+
"}\n",
131+
"</style>"
132+
]
133+
},
134+
{
135+
"cell_type": "code",
136+
"execution_count": 33,
137+
"id": "6323e85e",
138+
"metadata": {},
139+
"outputs": [],
140+
"source": [
141+
"# Pandas DataFrame Display Configuration\n",
142+
"pd.set_option('display.max_colwidth', 300)"
143+
]
144+
},
145+
{
146+
"attachments": {},
147+
"cell_type": "markdown",
148+
"id": "91d80bf7",
149+
"metadata": {},
150+
"source": [
151+
"## Git History\n",
152+
"\n"
153+
]
154+
},
155+
{
156+
"cell_type": "code",
157+
"execution_count": 38,
158+
"id": "dc682db6",
159+
"metadata": {},
160+
"outputs": [],
161+
"source": [
162+
"git_file_directories = query_cypher_to_data_frame(\"../cypher/GitLog/List_git_files_directories.cypher\", limit=50)"
163+
]
164+
},
165+
{
166+
"cell_type": "code",
167+
"execution_count": null,
168+
"id": "0d5d23ab",
169+
"metadata": {},
170+
"outputs": [],
171+
"source": [
172+
"git_file_directories"
173+
]
174+
},
175+
{
176+
"cell_type": "code",
177+
"execution_count": null,
178+
"id": "f2a7c8ea",
179+
"metadata": {},
180+
"outputs": [],
181+
"source": [
182+
"figure, axis = plot.subplots(figsize=(20,20))\n",
183+
"axis.set_axis_off()\n",
184+
"axis.set_title('Directories with the number of contained files')\n",
185+
"squarify.plot(\n",
186+
" sizes=git_file_directories.numberOfContainedFiles, \n",
187+
" label=git_file_directories.lastPathElement,\n",
188+
" text_kwargs={'color':'white', 'fontsize':9, 'fontweight':'bold'},\n",
189+
" edgecolor=\"white\", \n",
190+
" linewidth=4,\n",
191+
" ax=axis\n",
192+
")\n",
193+
"plot.show()"
194+
]
195+
}
196+
],
197+
"metadata": {
198+
"authors": [
199+
{
200+
"name": "JohT"
201+
}
202+
],
203+
"code_graph_analysis_pipeline_data_validation": "ValidateJavaInternalDependencies",
204+
"kernelspec": {
205+
"display_name": "codegraph",
206+
"language": "python",
207+
"name": "python3"
208+
},
209+
"language_info": {
210+
"codemirror_mode": {
211+
"name": "ipython",
212+
"version": 3
213+
},
214+
"file_extension": ".py",
215+
"mimetype": "text/x-python",
216+
"name": "python",
217+
"nbconvert_exporter": "python",
218+
"pygments_lexer": "ipython3",
219+
"version": "3.11.9"
220+
},
221+
"title": "Git History Charts for Java with Neo4j"
222+
},
223+
"nbformat": 4,
224+
"nbformat_minor": 5
225+
}

jupyter/environment.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,5 +15,6 @@ dependencies:
1515
- opentsne=1.0.* # to visualize node embeddings in 2D (t-SNE dimensionality reduction)
1616
- wordcloud=1.9.*
1717
- monotonic=1.*
18+
- squarify=0.4.* # to create tree maps e.g. for git history
1819
- pip:
1920
- neo4j==5.23.*

0 commit comments

Comments
 (0)