Create Dataframes

## Installing Packages
# In this code chunk, we install the required packages

install.packages("dplyr")           # Offers functions for data manipulation and transformation
install.packages("ggplot2")         # Enables data visualization using the Grammar of Graphics
install.packages("grid")            # Provides grid-based layout functions
install.packages("gridExtra")       # Extends grid-based layout with additional functionality
install.packages("jsonlite")       # Offers tools for working with JSON data
install.packages("lapply")         # Apply a function to either rows or columns of a matrix 
install.packages("shiny")           # Provides tools for creating interactive web applications
install.packages("shinyWidgets")    # Extends shiny with custom interactive widgets
install.packages("tibble")          # Provides a modern approach to data frames
install.packages("yaml")            # Enables parsing and handling YAML files

# Load the required packages
library(dplyr)           
library(ggplot2)         
library(grid)            
library(gridExtra)       
library(jsonlite)  
library(lapply)
library(shiny)           
library(shinyWidgets)    
library(tibble)          
library(yaml)            

#Version 1. - one Dataframe 

# Loading the configuration file using yaml.load_file() function
config <- yaml.load_file("config.yaml")

# Extracting the filenames of JSON files from the specified directory
ffilenames <- list.files(paste0(getwd(),config$path), pattern = "*.json", full.names = TRUE) 
json_files_list_all <- lapply(filenames, fromJSONfile <- function(x) rjson::fromJSON(file = x))

# Defining the target names for the analysis
target_names <- c('Condition', 'Medication', 'MedicationAdministration', 'MedicationStatment', 'Procedure', 'Specimen')

# We define two functions: get_diagnostics() and get_diagnostics_from_list().

get_diagnostics <- function(df, lable) {
  
  results_list <- list()
  
  for (l in lable) { 
    df_chunk <- df$validation[[l]]$issues
    r <- do.call(rbind, as.list(lapply(df_chunk, get_text_count <- function(x) { cbind(x$diagnostics, x$count) } ))) 
    r <- tibble(r)
    colnames(r) <- c(paste("diagnostics_", l), "counts")
    results_list <- append(results_list, r)
  }
  
  return(results_list)
}

# The get_diagnostics_from_list() function takes a list of dataframes (ldf) and a label (lable) as input.
# It applies the get_diagnostics() function to each dataframe in the list and returns a list of results.

get_diagnostics_from_list <- function(ldf, lable) {
  r <- (lapply(ldf, call_diagnostics_with_lable <- function(x) {get_diagnostics(x, lable)} ))
  return(r)
}

# We apply the get_diagnostics_from_list() function.

diagnistics_counts_list_all <- get_diagnostics_from_list(json_files_list_all, target_names)

# We rename the columns of the dataframe.

df3 <- diagnistics_counts_list_all[[1]]
df4 <- as.data.frame(df3[[2]])
colnames(df4) <- c("Errorcode", "Häufigkeit")
df4$Errorcode

# The code adds a new column called "Errorcode Label" to the df4 data frame. The values in this new column are determined based on the conditions specified in the case_when function.

df4 <- df4 %>%
  mutate(`Errorcode Label` = case_when(
    substr(Errorcode, 1, 10) == "Condition" ~ "Condition Label",
    substr(Errorcode, 1, 10) == "Medication" ~ "Medication Label",
    substr(Errorcode, 1, 10) == "MedicationAdministration" ~ "MedicationAdministration Label",
    substr(Errorcode, 1, 10) == "MedicationStatment" ~ "MedicationStatment Label",
    substr(Errorcode, 1, 10) == "Procedure" ~ "Procedure Label",
    substr(Errorcode, 1, 10) == "Specimen" ~ "Specimen Label",
    TRUE ~ "Other"
  ))


# Version 2 - Compare Dataframes side by side
compare_diagnostics <- function(df1, df2, label) {
  comparison_result <- data.frame()
  
  for (l in label) {
    diagnostics_col <- paste0("diagnostics_", l)
    counts_col <- "counts"
    
    # Compaare
    comparison <- df1[[diagnostics_col]] == df2[[diagnostics_col]]
    
    # Results 
    comparison_result <- cbind(comparison_result, comparison)
  }
  
  return(comparison_result)
}


for (i in 1:(length(json_files_list_all) - 1)) {
  dfHalle <- json_files_list_all[[i]]
  dfLeipzig <- json_files_list_all[[i + 1]]
}


# Checking the number of rows in both data frames
if (nrow(dfHalle) > 0 && nrow(dfLeipzig) > 0) {
  if (nrow(dfHalle) != nrow(dfLeipzig)) {
    stop("The data frames have different row counts.")
  }
  
  # Rest of the code for combining the data frames and displaying the result
} else {
  stop("One or both data frames have zero rows.")
}


# Combine the Dataframes
combined_df <- cbind(dfHalle, dfLeipzig)
combined_df