-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathCreate Dataframes
133 lines (96 loc) · 4.56 KB
/
Create Dataframes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
## Installing Packages
# In this code chunk, we install the required packages
install.packages("dplyr") # Offers functions for data manipulation and transformation
install.packages("ggplot2") # Enables data visualization using the Grammar of Graphics
install.packages("grid") # Provides grid-based layout functions
install.packages("gridExtra") # Extends grid-based layout with additional functionality
install.packages("jsonlite") # Offers tools for working with JSON data
install.packages("lapply") # Apply a function to either rows or columns of a matrix
install.packages("shiny") # Provides tools for creating interactive web applications
install.packages("shinyWidgets") # Extends shiny with custom interactive widgets
install.packages("tibble") # Provides a modern approach to data frames
install.packages("yaml") # Enables parsing and handling YAML files
# Load the required packages
library(dplyr)
library(ggplot2)
library(grid)
library(gridExtra)
library(jsonlite)
library(lapply)
library(shiny)
library(shinyWidgets)
library(tibble)
library(yaml)
#Version 1. - one Dataframe
# Loading the configuration file using yaml.load_file() function
config <- yaml.load_file("config.yaml")
# Extracting the filenames of JSON files from the specified directory
ffilenames <- list.files(paste0(getwd(),config$path), pattern = "*.json", full.names = TRUE)
json_files_list_all <- lapply(filenames, fromJSONfile <- function(x) rjson::fromJSON(file = x))
# Defining the target names for the analysis
target_names <- c('Condition', 'Medication', 'MedicationAdministration', 'MedicationStatment', 'Procedure', 'Specimen')
# We define two functions: get_diagnostics() and get_diagnostics_from_list().
get_diagnostics <- function(df, lable) {
results_list <- list()
for (l in lable) {
df_chunk <- df$validation[[l]]$issues
r <- do.call(rbind, as.list(lapply(df_chunk, get_text_count <- function(x) { cbind(x$diagnostics, x$count) } )))
r <- tibble(r)
colnames(r) <- c(paste("diagnostics_", l), "counts")
results_list <- append(results_list, r)
}
return(results_list)
}
# The get_diagnostics_from_list() function takes a list of dataframes (ldf) and a label (lable) as input.
# It applies the get_diagnostics() function to each dataframe in the list and returns a list of results.
get_diagnostics_from_list <- function(ldf, lable) {
r <- (lapply(ldf, call_diagnostics_with_lable <- function(x) {get_diagnostics(x, lable)} ))
return(r)
}
# We apply the get_diagnostics_from_list() function.
diagnistics_counts_list_all <- get_diagnostics_from_list(json_files_list_all, target_names)
# We rename the columns of the dataframe.
df3 <- diagnistics_counts_list_all[[1]]
df4 <- as.data.frame(df3[[2]])
colnames(df4) <- c("Errorcode", "Häufigkeit")
df4$Errorcode
# The code adds a new column called "Errorcode Label" to the df4 data frame. The values in this new column are determined based on the conditions specified in the case_when function.
df4 <- df4 %>%
mutate(`Errorcode Label` = case_when(
substr(Errorcode, 1, 10) == "Condition" ~ "Condition Label",
substr(Errorcode, 1, 10) == "Medication" ~ "Medication Label",
substr(Errorcode, 1, 10) == "MedicationAdministration" ~ "MedicationAdministration Label",
substr(Errorcode, 1, 10) == "MedicationStatment" ~ "MedicationStatment Label",
substr(Errorcode, 1, 10) == "Procedure" ~ "Procedure Label",
substr(Errorcode, 1, 10) == "Specimen" ~ "Specimen Label",
TRUE ~ "Other"
))
# Version 2 - Compare Dataframes side by side
compare_diagnostics <- function(df1, df2, label) {
comparison_result <- data.frame()
for (l in label) {
diagnostics_col <- paste0("diagnostics_", l)
counts_col <- "counts"
# Compaare
comparison <- df1[[diagnostics_col]] == df2[[diagnostics_col]]
# Results
comparison_result <- cbind(comparison_result, comparison)
}
return(comparison_result)
}
for (i in 1:(length(json_files_list_all) - 1)) {
dfHalle <- json_files_list_all[[i]]
dfLeipzig <- json_files_list_all[[i + 1]]
}
# Checking the number of rows in both data frames
if (nrow(dfHalle) > 0 && nrow(dfLeipzig) > 0) {
if (nrow(dfHalle) != nrow(dfLeipzig)) {
stop("The data frames have different row counts.")
}
# Rest of the code for combining the data frames and displaying the result
} else {
stop("One or both data frames have zero rows.")
}
# Combine the Dataframes
combined_df <- cbind(dfHalle, dfLeipzig)
combined_df