-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathExecuteAnalyses.R
More file actions
86 lines (73 loc) · 3.43 KB
/
ExecuteAnalyses.R
File metadata and controls
86 lines (73 loc) · 3.43 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
# -------------------------------------------------------
# PLEASE READ
# -------------------------------------------------------
#
# You must call "renv::restore()" and follow the prompts
# to install all of the necessary R libraries to run this
# project. This is a one-time operation that you must do
# before running any code.
#
# !!! PLEASE RESTART R AFTER RUNNING renv::restore() !!!
#
# -------------------------------------------------------
renv::restore()
# ENVIRONMENT SETTINGS NEEDED FOR RUNNING STUDY ------------
Sys.setenv("_JAVA_OPTIONS"="-Xmx4g") # Sets the Java maximum heap space to 4GB
Sys.setenv("VROOM_THREADS"=1) # Sets the number of threads to 1 to avoid deadlocks on file system
##=========== START OF INPUTS ==========
options(sqlRenderTempEmulationSchema = "scratch.scratch_mschuemi") # For database platforms that don't support temp tables
cdmDatabaseSchema <- "merative_ccae.cdm_merative_ccae_v3046" # The database / schema where the data in CDM format live
workDatabaseSchema <- "scratch.scratch_mschuemi" # A database /schema where study tables can be written
cohortTableName <- "example_strategus_study_ccae" # Where the cohorts will be written
resultsFolder <- "e:/exampleStrategusStudy/results" # Where the output files will be written
workFolder <- "e:/exampleStrategusStudy/strategusInternals" # Where the intermediate work files will be written
databaseName <- "CCAE" # Only used as a folder name for results from the study
minCellCount <- 5 # Minimum cell count for inclusion in output tables
# Create the connection details for your CDM
# More details on how to do this are found here:
# https://ohdsi.github.io/DatabaseConnector/reference/createConnectionDetails.html
connectionDetails <- DatabaseConnector::createConnectionDetails(
dbms = "spark",
connectionString = keyring::key_get("databricksConnectionString"),
user = "token",
password = keyring::key_get("databricksToken")
)
# You can use this snippet to test your connection
#conn <- DatabaseConnector::connect(connectionDetails)
#DatabaseConnector::disconnect(conn)
##=========== END OF INPUTS ==========
##################################
# DO NOT MODIFY BELOW THIS POINT
##################################
config <- config::get()
resultsFolder <- file.path(resultsFolder, databaseName)
workFolder <- file.path(workFolder, databaseName)
analysisSpecifications <- ParallelLogger::loadSettingsFromJson(
fileName = file.path(config$projectRootFolder, "inst", config$studySpecificationFileName)
)
executionSettings <- Strategus::createCdmExecutionSettings(
workDatabaseSchema = workDatabaseSchema,
cdmDatabaseSchema = cdmDatabaseSchema,
cohortTableNames = CohortGenerator::getCohortTableNames(cohortTable = cohortTableName),
workFolder = workFolder,
resultsFolder = resultsFolder,
minCellCount = minCellCount
# IF YOU NEED TO RE-RUN A STUDY BUT ONLY WANT TO RUN SPECIFIC MODULES,
# ADD A COMMA AFTER minCellCount AND USE modulesToExecute:
# modulesToExecute = c("CohortGeneratorModule", "SelfControlledCaseSeriesModule")
)
if (!dir.exists(resultsFolder)) {
dir.create(resultsFolder, recursive = T)
}
if (!dir.exists(workFolder)) {
dir.create(workFolder, recursive = T)
}
ParallelLogger::saveSettingsToJson(
object = executionSettings,
fileName = file.path(resultsFolder, "executionSettings.json")
)
Strategus::execute(
analysisSpecifications = analysisSpecifications,
executionSettings = executionSettings,
connectionDetails = connectionDetails
)