-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathAnalysis.R
186 lines (155 loc) · 6.08 KB
/
Analysis.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
# Set up ----
## Install here and pacman packages if they are not installed
if (any((c("here", "pacman") %in%
rownames(installed.packages())) == FALSE)) {
install.packages(c("here", "pacman")[!(c("here", "pacman") %in%
rownames(installed.packages()))])
}
# Define some general custom functions to use across scripts ----
source(here::here("scripts"
, "general_functions.R"))
# Packages ----
# Install the tidyverse package if it is not yet installed
install_new_packs("tidyverse")
# Vector holding the list of packages that will be used across scripts
analysis_packs <- c("tictoc", "beepr", "stringr", "here", "tibble", "tidyr"
, "dplyr")
# install and/or load packages
load_my_packs(analysis_packs)
# A vector to contain the names of the objects generated by the master script
## Any object named within this vector will be preserved in the environment
## after the environment cleaning last steps of each script
analysis_objects <- c(ls(), "analysis_objects")
# Process samples data ----
# This scripts prepare the samples metadata for its usage by other scripts
source(here("scripts", "samples_data.R"))
# RT-qPCR analysis ----
### Import qPCR raw data ####
# This script imports the raw qPCR data and prepares it for the calculation of
# the relative gene expression (CNRQs)
source(here("scripts", "import-qPCR-data.R"))
gc()
### Calculate CNRQs scaled to group ####
# This script calculates the relative gene expression (CNRQs)
source(here("scripts", "CNRQs.R"))
gc()
### Models to analyze differences in relative gene expression ####
#### It takes a lot of time to run this script, due to the bootstrapping
##### Define number of bootstrap re-samples
bootstrap_reps <- 10e3
source(here("scripts", "rel-exp_boot-models.R")); beep()
Sys.sleep(1)
### Plots for inference on the gene expression differences ####
source(here("scripts", "rel-exp_plots.R")); beep()
Sys.sleep(1)
# GCMS analysis ----
## Define GCMS runs batches
gcms_batch_list <- list.dirs(path = here("data", "raw","gcms-integration")
, recursive = F) |>
str_split("/", simplify = T) |>
as.data.frame() |>
pull(last_col())
analysis_objects <- c(analysis_objects
, "gcms_batch_list")
### Import GCMS integration data ####
## Iterate through GCMS batches
for (gcms_batch in gcms_batch_list) {
## The script imports the integration data for all the samples of the batch
## and prepares it for the automatic alignment
source(here("scripts", "import_GCMSdata.R"))
gc()
}; beep()
### Align GCMS data ####
#### Data frame defining the criteria for the automatic alignment of the GCMS
#### integration data of the samples of each batch
alignment_criteria <- {gcms_batch_list |>
cbind(
tribble(
# Threshold for partial peak alignment step.
# Shortest distance between neighboring peaks within samples
~partial_alignment_threshold,
# Keep the linear shift small. The third of the partial alignment
# criteria is just an arbitrary value. The idea is to avoid that this
# shift compromises the partial alignment or rows merging steps
~linear_shift_criteria,
# Threshold for merging rows step.
# Skimming the chromatograms no compound seemed to have a separation
# over "n" min between samples.
# Round "n" down to establish the threshold (e.g. 0.152 -> 0.15)
~row_merging_threshold,
# Has to be changed -corresponds to the first batch of plasticity samples
# batch 06032023:
# Shortest distance between neighboring peaks within samples
# , 1 / 3 of the shortest distance between neighboring peaks within samples
# , 34.320 - 34.332
0.04, 0.04 / 3, 0.10,
# batch 20022023:
# Shortest distance between neighboring peaks within samples
# , 1 / 3 of the shortest distance between neighboring peaks within samples
# , 34.268 - 34.332
0.04, 0.04 / 3, 0.06
)
)}
analysis_objects <- c(analysis_objects
, "alignment_criteria")
### Iterate through batches
for (gcms_batch in gcms_batch_list) {
# Define the alignment criteria for the corresponding batch
partial_alignment_threshold <-
alignment_criteria["partial_alignment_threshold"][
alignment_criteria[
"gcms_batch_list"] == gcms_batch]
linear_shift_criteria <-
alignment_criteria["linear_shift_criteria"][
alignment_criteria[
"gcms_batch_list"] == gcms_batch]
row_merging_threshold <-
alignment_criteria["row_merging_threshold"][
alignment_criteria[
"gcms_batch_list"] == gcms_batch]
# The script performs the alignment(s) of the GCMS data of the batch
source(here("Scripts", "align_MS-data.R"))
gc()
}; beep()
Sys.sleep(1)
## Correct alignment ----
### Iterate through GCMS batches
for (gcms_batch in gcms_batch_list) {
# Script to correct alignments
source(here("Scripts", "correct-GCalignment.R"))
gc()
}; beep()
Sys.sleep(1)
## Generate the group tables ----
### Iterate through GCMS batches
for (gcms_batch in gcms_batch_list) {
# Script to prepare the group alignments for assembling the master table
source(here("Scripts", "group-tables.R"))
gc()
}; beep()
Sys.sleep(1)
## Build the master table ----
# Script to assemble the final CHC composition data set (master table)
source(here("Scripts", "mastertable.R"))
gc()
beep()
Sys.sleep(1)
## Univariate CHC stats ----
# Script to perform analyses on the abundance of hydrocarbon classes and mean
# chain length
source(here("scripts", "univariate_chcs.R")); beep()
Sys.sleep(1)
# Correlation of gene expression and CHC composition ----
# Script to make the correlograms for the CHC composition vs gene expression
source(here("scripts", "correlograms.R")); beep()
Sys.sleep(1)
# NMDS ----
# Script to perform the Non-metric multidimensional Scaling (NMDS)
source(here("scripts", "nmds.R")); beep()
Sys.sleep(1)
# END ----
## Report session information
capture.output(sessionInfo()
, file = here("output"
,"SInf_analysis-master-script.txt"))
beep(8)