Analysis.R

# Set up ----
## Install here and pacman packages if they are not installed
if (any((c("here", "pacman") %in% 
         rownames(installed.packages())) == FALSE)) {
  install.packages(c("here", "pacman")[!(c("here", "pacman") %in%
                                           rownames(installed.packages()))])
}

# Define some general custom functions to use across scripts ----
source(here::here("scripts"
                  , "general_functions.R"))

# Packages ----
# Install the tidyverse package if it is not yet installed
install_new_packs("tidyverse")

# Vector holding the list of packages that will be used across scripts
analysis_packs <- c("tictoc", "beepr", "stringr", "here", "tibble", "tidyr"
                    , "dplyr")

# install and/or load packages
load_my_packs(analysis_packs)

# A vector to contain the names of the objects generated by the master script
## Any object named within this vector will be preserved in the environment
## after the environment cleaning last steps of each script
analysis_objects <- c(ls(), "analysis_objects")

# Process samples data ----
# This scripts prepare the samples metadata for its usage by other scripts
source(here("scripts", "samples_data.R"))

# RT-qPCR analysis ----
### Import qPCR raw data ####
# This script imports the raw qPCR data and prepares it for the calculation of
# the relative gene expression (CNRQs)
source(here("scripts", "import-qPCR-data.R"))
gc()

### Calculate CNRQs scaled to group ####
# This script calculates the relative gene expression (CNRQs)
source(here("scripts", "CNRQs.R"))
gc()

### Models to analyze differences in relative gene expression ####
#### It takes a lot of time to run this script, due to the bootstrapping
##### Define number of bootstrap re-samples
bootstrap_reps <- 10e3
source(here("scripts", "rel-exp_boot-models.R")); beep()
Sys.sleep(1)

### Plots for inference on the gene expression differences ####
source(here("scripts", "rel-exp_plots.R")); beep()
Sys.sleep(1)

# GCMS analysis ----
## Define GCMS runs batches
gcms_batch_list <- list.dirs(path = here("data", "raw","gcms-integration")
                             , recursive = F) |>
  str_split("/", simplify = T) |> 
  as.data.frame() |> 
  pull(last_col())

analysis_objects <- c(analysis_objects
                      , "gcms_batch_list")

### Import GCMS integration data ####
## Iterate through GCMS batches
for (gcms_batch in gcms_batch_list) {
  ## The script imports the integration data for all the samples of the batch
  ## and prepares it for the automatic alignment
  source(here("scripts", "import_GCMSdata.R"))
  gc()
}; beep()

### Align GCMS data ####
#### Data frame defining the criteria for the automatic alignment of the GCMS
#### integration data of the samples of each batch
alignment_criteria <- {gcms_batch_list |> 
  cbind(
    tribble(
      # Threshold for partial peak alignment step.
      # Shortest distance between neighboring peaks within samples
      ~partial_alignment_threshold,
      
      # Keep the linear shift small. The third of the partial alignment 
      # criteria is just an arbitrary value. The idea is to avoid that this 
      # shift compromises the partial alignment or rows merging steps
      ~linear_shift_criteria,
      
      # Threshold for merging rows step.
      # Skimming the chromatograms no compound seemed to have a separation
      # over "n" min between samples.
      # Round "n" down to establish the threshold (e.g. 0.152 -> 0.15)
      ~row_merging_threshold,
      
      #  Has to be changed -corresponds to the first batch of plasticity samples
      # batch 06032023: 
      # Shortest distance between neighboring peaks within samples
      # , 1 / 3 of the shortest distance between neighboring peaks within samples
      # , 34.320 - 34.332
      0.04, 0.04 / 3, 0.10,
      
      # batch 20022023: 
      # Shortest distance between neighboring peaks within samples
      # , 1 / 3 of the shortest distance between neighboring peaks within samples
      # , 34.268 - 34.332 
      0.04, 0.04 / 3, 0.06
    ) 
  )}

analysis_objects <- c(analysis_objects
                      , "alignment_criteria")

### Iterate through batches
for (gcms_batch in gcms_batch_list) {
  # Define the alignment criteria for the corresponding batch
  partial_alignment_threshold <- 
    alignment_criteria["partial_alignment_threshold"][
      alignment_criteria[
        "gcms_batch_list"] == gcms_batch]
  
  linear_shift_criteria <- 
    alignment_criteria["linear_shift_criteria"][
      alignment_criteria[
        "gcms_batch_list"] == gcms_batch]
  
  row_merging_threshold <- 
    alignment_criteria["row_merging_threshold"][
      alignment_criteria[
        "gcms_batch_list"] == gcms_batch]
  
  # The script performs the alignment(s) of the GCMS data of the batch
  source(here("Scripts", "align_MS-data.R"))
  gc()
}; beep()
Sys.sleep(1)

## Correct alignment ----
### Iterate through GCMS batches
for (gcms_batch in gcms_batch_list) {
  # Script to correct alignments
  source(here("Scripts", "correct-GCalignment.R"))
  gc()
}; beep()
Sys.sleep(1)

## Generate the group tables ----
### Iterate through GCMS batches
for (gcms_batch in gcms_batch_list) {
  # Script to prepare the group alignments for assembling the master table
  source(here("Scripts", "group-tables.R"))
  gc()
}; beep()
Sys.sleep(1)

## Build the master table ----
# Script to assemble the final CHC composition data set (master table)
source(here("Scripts", "mastertable.R"))
gc()
beep()
Sys.sleep(1)

## Univariate CHC stats ----
# Script to perform analyses on the abundance of hydrocarbon classes and mean 
# chain length
source(here("scripts", "univariate_chcs.R")); beep()
Sys.sleep(1)

# Correlation of gene expression and CHC composition ----
# Script to make the correlograms for the CHC composition vs gene expression
source(here("scripts", "correlograms.R")); beep()
Sys.sleep(1)

# NMDS ----
# Script to perform the Non-metric multidimensional Scaling (NMDS)
source(here("scripts", "nmds.R")); beep()
Sys.sleep(1)

# END ----
## Report session information
capture.output(sessionInfo()
               , file = here("output"
                             ,"SInf_analysis-master-script.txt"))

beep(8)