Skip to content

Commit 04a6125

Browse files
committed
Add anomaly detection Markdown summary report
1 parent 4d361d1 commit 04a6125

File tree

2 files changed

+102
-0
lines changed

2 files changed

+102
-0
lines changed

domains/anomaly-detection/anomalyDetectionCsv.sh

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,8 @@ anomaly_detection_features() {
6767
# Required Parameters:
6868
# - projection_node_label=...
6969
# Label of the nodes that will be used for the projection. Example: "Package"
70+
# - projection_language=...
71+
# Name of the associated programming language. Default: "Java". Example: "Typescript"
7072
anomaly_detection_queries() {
7173
local nodeLabel
7274
nodeLabel=$( extractQueryParameter "projection_node_label" "${@}" )
@@ -92,6 +94,8 @@ anomaly_detection_queries() {
9294
# Required Parameters:
9395
# - projection_node_label=...
9496
# Label of the nodes that will be used for the projection. Example: "Package"
97+
# - projection_language=...
98+
# Name of the associated programming language. Examples: "Java", "Typescript"
9599
anomaly_detection_labels() {
96100
local nodeLabel
97101
nodeLabel=$( extractQueryParameter "projection_node_label" "${@}" )
@@ -109,6 +113,63 @@ anomaly_detection_labels() {
109113
# execute_cypher "${ANOMALY_DETECTION_LABEL_CYPHER_DIR}/AnomalyDetectionArchetypeOutlier.cypher" "${@}"
110114
}
111115

116+
# TODO delete if not needed anymore
117+
# # Initialize anomaly detail (Markdown) report.
118+
# # Intended to be run the before the first call of "anomaly_detection_detail_report".
119+
# initialize_anomaly_detection_detail_report() {
120+
# archetype_summary_directory=${FULL_REPORT_DIRECTORY}/archetype-summary-${language}-${nodeLabel}
121+
# rm -rf "${archetype_summary_directory}"
122+
# }
123+
124+
# Appends a Markdown table to an existing file and
125+
# removes redundant header + separator rows.
126+
#
127+
# Usage:
128+
# cat newTable.md | append_table myMarkdownFile.md
129+
#
130+
# append_table myMarkdownFile.md <<'EOF'
131+
# | Name | Score | Archetype |
132+
# | --- | --- | --- |
133+
# | Bar | 0.9 | Something |
134+
# EOF
135+
#
136+
# Behavior:
137+
# - Keeps the first header row and its following separator row.
138+
# - Removes all subsequent duplicate header + separator pairs.
139+
# - Leaves all data rows untouched.
140+
append_to_markdown_table() {
141+
local file="$1"
142+
143+
# Append stdin to the target file
144+
cat >> "${file}"
145+
146+
# Clean up duplicate headers (header row + --- row)
147+
awk '!seen[$0]++ || NR <= 2' "${file}" > "${file}.tmp" && mv "${file}.tmp" "${file}"
148+
}
149+
150+
# Aggregates all results in a Markdown report.
151+
# Note: Call "initialize_anomaly_detection_detail_report" before the first call of this function.
152+
#
153+
# Required Parameters:
154+
# - projection_node_label=...
155+
# Label of the nodes that will be used for the projection. Example: "Package"
156+
# - projection_language=...
157+
# Name of the associated programming language. Examples: "Java", "Typescript"
158+
anomaly_detection_detail_report() {
159+
local nodeLabel
160+
nodeLabel=$( extractQueryParameter "projection_node_label" "${@}" )
161+
162+
local language
163+
language=$( extractQueryParameter "projection_language" "${@}" )
164+
165+
echo "anomalyDetectionCsv: $(date +'%Y-%m-%dT%H:%M:%S%z') Creating ${language} ${nodeLabel} anomaly summary Markdown report..."
166+
167+
archetype_summary_directory=${FULL_REPORT_DIRECTORY}/archetype-summary-${language}-${nodeLabel}
168+
mkdir -p "${archetype_summary_directory}"
169+
# execute_cypher "${ANOMALY_DETECTION_LABEL_CYPHER_DIR}/AnomalyDetectionArchetypeSummary.cypher" "${@}" --output-markdown-table | append_to_markdown_table "${FULL_REPORT_DIRECTORY}/TopAnomaliesByArchetype.md"
170+
execute_cypher "${ANOMALY_DETECTION_LABEL_CYPHER_DIR}/AnomalyDetectionArchetypeSummary.cypher" "${@}" --output-markdown-table > "${archetype_summary_directory}/TopAnomaliesByArchetype.md"
171+
}
172+
112173
# Run the anomaly detection pipeline.
113174
#
114175
# Required Parameters:
@@ -118,10 +179,13 @@ anomaly_detection_labels() {
118179
# Label of the nodes that will be used for the projection. Example: "Package"
119180
# - projection_weight_property=...
120181
# Name of the node property that contains the dependency weight. Example: "weight"
182+
# - projection_language=...
183+
# Name of the associated programming language. Examples: "Java", "Typescript"
121184
anomaly_detection_csv_reports() {
122185
time anomaly_detection_features "${@}"
123186
time anomaly_detection_queries "${@}"
124187
time anomaly_detection_labels "${@}"
188+
time anomaly_detection_detail_report "${@}"
125189
}
126190

127191
# Create report directory
@@ -146,6 +210,9 @@ ALGORITHM_LANGUAGE="projection_language"
146210
COMMUNITY_PROPERTY="community_property=communityLeidenIdTuned"
147211
EMBEDDING_PROPERTY="embedding_property=embeddingsFastRandomProjectionTunedForClustering"
148212

213+
#TODO delete if not needed anymore
214+
#initialize_anomaly_detection_detail_report
215+
149216
# -- Java Artifact Node Embeddings -------------------------------
150217

151218
if createUndirectedDependencyProjection "${PROJECTION_NAME}=artifact-anomaly-detection" "${PROJECTION_NODE}=Artifact" "${PROJECTION_WEIGHT}=weight" "${PROJECTION_LANGUAGE}=Java"; then
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
// Anomaly Detection Labels: Summarizes all labelled archetypes by their anomaly score including their archetype rank. For code units with more than one archetype, the one with the higher rank is shown. Requires all other labels/*.cypher queries to run first. Variables: projection_language, projection_node_label
2+
3+
MATCH (codeUnit)
4+
WHERE $projection_node_label IN labels(codeUnit)
5+
UNWIND keys(codeUnit) AS codeUnitProperty
6+
WITH *
7+
WHERE codeUnitProperty starts with 'anomaly'
8+
AND codeUnitProperty ends with 'Rank'
9+
WITH *
10+
,coalesce(codeUnit.fqn, codeUnit.globalFqn, codeUnit.fileName, codeUnit.signature, codeUnit.name) AS codeUnitName
11+
,split(split(codeUnitProperty, 'anomaly')[1], 'Rank')[0] AS archetype
12+
,codeUnit[codeUnitProperty] AS archetypeRank
13+
,codeUnit.anomalyScore AS anomalyScore
14+
ORDER BY codeUnit.anomalyScore DESC, archetypeRank ASC, codeUnitName ASC, archetype ASC
15+
OPTIONAL MATCH (artifact:Java:Artifact)-[:CONTAINS]->(codeUnit)
16+
WITH *, artifact.name AS artifactName
17+
OPTIONAL MATCH (projectRoot:Directory)<-[:HAS_ROOT]-(proj:TS:Project)-[:CONTAINS]->(codeUnit)
18+
WITH *, last(split(projectRoot.absoluteFileName, '/')) AS projectName
19+
OPTIONAL MATCH (codeDirectory:File:Directory)-[:CONTAINS]->(codeUnit)
20+
WITH *, split(replace(codeDirectory.fileName, './', ''), '/')[-2] AS directoryName
21+
WITH *, coalesce(artifactName, projectName, directoryName, "") AS projectName
22+
RETURN projectName AS `Contained in`
23+
//$projection_language + ' ' + $projection_node_label AS `Code Unit`
24+
,codeUnitName AS `Name`
25+
,round(anomalyScore, 4, 'HALF_UP') AS `Score`
26+
,collect(archetype)[0] AS `Archetype`
27+
,collect(archetypeRank)[0] AS `Archetype Rank`
28+
,coalesce(codeUnit.anomalyTopFeature1, "") AS `Top Feature 1`
29+
,coalesce(round(codeUnit.anomalyTopFeatureSHAPValue1, 4, 'HALF_UP'), 0.0) AS `Top Feature 1 SHAP`
30+
,coalesce(codeUnit.anomalyTopFeature2, "") AS `Top Feature 2`
31+
,coalesce(round(codeUnit.anomalyTopFeatureSHAPValue2, 4, 'HALF_UP'), 0.0) AS `Top Feature 2 SHAP`
32+
,coalesce(codeUnit.anomalyTopFeature3, "") AS `Top Feature 3`
33+
,coalesce(round(codeUnit.anomalyTopFeatureSHAPValue3, 4, 'HALF_UP'), 0.0) AS `Top Feature 3 SHAP`
34+
//,collect(archetype)[1] AS secondaryArchetype
35+
//,collect(archetypeRank)[1] AS secondaryArchetypeRank

0 commit comments

Comments
 (0)