OHDSI · jreps · Sep 9, 2025 · Nov 5, 2024 · Nov 5, 2024 · Mar 15, 2025
diff --git a/.github/workflows/R_CMD_check_Hades.yaml b/.github/workflows/R_CMD_check_Hades.yaml
@@ -22,7 +22,7 @@ jobs:
         config:
           - {os: windows-latest, r: 'release'}
           - {os: macOS-latest, r: 'release'}
-          - {os: ubuntu-20.04, r: 'release', rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest"}
+          - {os: ubuntu-22.04, r: 'release', rtools: ''}
 
     env:
       GITHUB_PAT: ${{ secrets.GH_TOKEN }}
@@ -77,12 +77,15 @@ jobs:
       - name: Install system requirements
         if: runner.os == 'Linux'
         run: |
+          sudo apt-get install -y make
+          sudo apt-get install -y libcurl4-openssl-dev
           sudo apt-get install -y libssh-dev
+          sudo apt-get install -y libssl-dev
           Rscript -e 'install.packages("remotes")'
           while read -r cmd
           do
             eval sudo $cmd
-          done < <(Rscript -e 'writeLines(remotes::system_requirements("ubuntu", "20.04"))')
+          done < <(Rscript -e 'writeLines(remotes::system_requirements("ubuntu", "22.04"))')
 
       - uses: r-lib/actions/setup-r-dependencies@v2
         with:

diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,8 +1,8 @@
 Package: Characterization
 Type: Package
 Title: Implement Descriptive Studies Using the Common Data Model
-Version: 2.1.3
-Date: 2025-2-26
+Version: 2.2.0
+Date: 2025-8-28
 Authors@R: c(
   person("Jenna", "Reps", , "jreps@its.jnj.com", role = c("aut", "cre")),
   person("Patrick", "Ryan", , "ryan@ohdsi.org", role = c("aut")),
@@ -16,9 +16,9 @@ BugReports: https://github.com/OHDSI/Characterization/issues
 Depends:
   R (>= 4.0.0)
 Imports:
-  Andromeda,
+  Andromeda (>= 1.0.0),
   DatabaseConnector (>= 6.3.1),
-  FeatureExtraction  (>= 3.6.0),
+  FeatureExtraction  (>= 3.10.0),
   SqlRender (>= 1.9.0),
   ParallelLogger (>= 3.0.0),
   ResultModelManager,
@@ -28,6 +28,7 @@ Imports:
   rlang
 Suggests:
   devtools,
+  formatR,
   testthat,
   kableExtra,
   knitr,

diff --git a/NEWS.md b/NEWS.md
@@ -1,3 +1,15 @@
+Characterization 2.2.0
+======================
+- fixed csv spec: made mean_exposure_time a float and specified that min_characterization_mean in covariate table must be non-null and is in the pk.
+- changed Line 284 in AggregateCovariates.R to cast exposure_time summary values to bigint due to integer overflow in some dbms.
+- added dummy sql code to prevent warnings about missing variables
+- added code to save empty csv files when there are no rows as that way it is easier to see there are no results vs an error saving.
+- removed progress bar from custom during features
+- added option includedFiles in insertResultsToDatabase() where you can specify the csv files to upload to prevent warnings of missing csv files.
+- made sure all connections are disconnected after use
+- fixed counts to use count_big (thanks Anthony Sena) to fix an issue where the number was bigger than an integer.
+- added code to copy csv files in batches this is needed when the csv files are very large.
+
 Characterization 2.1.3
 ======================
 - prepared for CRAN by adding examples, removing getwd(), replacing T/F with TRUE/FALSE and added example data inside package so no download required.

diff --git a/R/AggregateCovariates.R b/R/AggregateCovariates.R
@@ -202,6 +202,7 @@ computeTargetAggregateCovariateAnalyses <- function(
     outputFolder,
     minCharacterizationMean = 0,
     minCellCount = 0,
+    progressBar = interactive(),
     ...) {
 
   if(missing(outputFolder)){
@@ -251,7 +252,7 @@ computeTargetAggregateCovariateAnalyses <- function(
     tempTable = TRUE,
     dropTableIfExists = TRUE,
     createTable = TRUE,
-    progressBar = FALSE,
+    progressBar = progressBar,
     tempEmulationSchema = tempEmulationSchema
   )
 
@@ -273,7 +274,7 @@ computeTargetAggregateCovariateAnalyses <- function(
   DatabaseConnector::executeSql(
     connection = connection,
     sql = sql,
-    progressBar = FALSE,
+    progressBar = progressBar,
     reportOverallTime = FALSE
   )
   completionTime <- Sys.time() - start
@@ -283,11 +284,11 @@ computeTargetAggregateCovariateAnalyses <- function(
   message("Extracting target cohort counts")
   sql <- "select
   cohort_definition_id,
-  count(*) row_count,
-  count(distinct subject_id) person_count,
-  min(datediff(day, cohort_start_date, cohort_end_date)) min_exposure_time,
-  avg(datediff(day, cohort_start_date, cohort_end_date)) mean_exposure_time,
-  max(datediff(day, cohort_start_date, cohort_end_date)) max_exposure_time
+  count_big(*) row_count,
+  count_big(distinct subject_id) person_count,
+  min(cast(datediff(day, cohort_start_date, cohort_end_date) as bigint)) min_exposure_time,
+  avg(cast(datediff(day, cohort_start_date, cohort_end_date) as bigint)) mean_exposure_time,
+  max(cast(datediff(day, cohort_start_date, cohort_end_date) as bigint)) max_exposure_time
   from
   (select * from #agg_cohorts_before union select * from #agg_cohorts_extras) temp
   group by cohort_definition_id;"
@@ -299,7 +300,7 @@ computeTargetAggregateCovariateAnalyses <- function(
   counts <- DatabaseConnector::querySql(
     connection = connection,
     sql = sql,
-    snakeCaseToCamelCase = TRUE,
+    snakeCaseToCamelCase = TRUE
   )
 
   message("Target Aggregate: Computing aggregate target covariate results")
@@ -327,7 +328,8 @@ computeTargetAggregateCovariateAnalyses <- function(
   )
   DatabaseConnector::executeSql(
     connection = connection,
-    sql = sql, progressBar = FALSE,
+    sql = sql,
+    progressBar = progressBar,
     reportOverallTime = FALSE
   )
 
@@ -363,6 +365,7 @@ computeCaseAggregateCovariateAnalyses <- function(
     outputFolder,
     minCharacterizationMean = 0,
     minCellCount = 0,
+    progressBar = interactive(),
     ...) {
 
   if(missing(outputFolder)){
@@ -443,7 +446,7 @@ computeCaseAggregateCovariateAnalyses <- function(
     tempTable = TRUE,
     dropTableIfExists = TRUE,
     createTable = TRUE,
-    progressBar = FALSE,
+    progressBar = progressBar,
     tempEmulationSchema = tempEmulationSchema
   )
 
@@ -469,7 +472,7 @@ computeCaseAggregateCovariateAnalyses <- function(
   DatabaseConnector::executeSql(
     connection = connection,
     sql = sql,
-    progressBar = FALSE,
+    progressBar = progressBar,
     reportOverallTime = FALSE
   )
 
@@ -495,7 +498,7 @@ computeCaseAggregateCovariateAnalyses <- function(
     DatabaseConnector::executeSql(
       connection = connection,
       sql = sql,
-      progressBar = FALSE,
+      progressBar = progressBar,
       reportOverallTime = FALSE
     )
   }
@@ -522,7 +525,7 @@ computeCaseAggregateCovariateAnalyses <- function(
   counts <- DatabaseConnector::querySql(
     connection = connection,
     sql = sql,
-    snakeCaseToCamelCase = TRUE,
+    snakeCaseToCamelCase = TRUE
   )
 
   message("Case Aggregates: Computing aggregate before case covariate results")
@@ -576,7 +579,8 @@ computeCaseAggregateCovariateAnalyses <- function(
   )
   DatabaseConnector::executeSql(
     connection = connection,
-    sql = sql, progressBar = FALSE,
+    sql = sql,
+    progressBar = progressBar,
     reportOverallTime = FALSE
   )
 

diff --git a/R/CustomCovariates.R b/R/CustomCovariates.R
@@ -147,6 +147,8 @@ createDuringCovariateSettings <- function(
 #'   cohortTable = 'cohort'
 #' )
 #'
+#' DatabaseConnector::disconnect(connection)
+#'
 #' @return
 #' A 'FeatureExtraction' covariateData object containing the during covariates based on user settings
 #'
@@ -171,6 +173,8 @@ getDbDuringCovariateData <- function(
 
   getDomainSettings <- utils::read.csv(system.file("csv/PrespecAnalyses.csv", package = "Characterization"))
 
+  # not showing progress
+  progressBar <- FALSE
 
   # create Tables
   sql <- "DROP TABLE IF EXISTS #cov_ref;
@@ -187,7 +191,7 @@ getDbDuringCovariateData <- function(
     targetDialect = DatabaseConnector::dbms(connection),
     tempEmulationSchema = tempEmulationSchema
   )
-  DatabaseConnector::executeSql(connection, sql = sql)
+  DatabaseConnector::executeSql(connection, sql = sql, progressBar = progressBar)
 
   sql <- "DROP TABLE IF EXISTS #analysis_ref;
   CREATE TABLE #analysis_ref(
@@ -204,7 +208,7 @@ getDbDuringCovariateData <- function(
     targetDialect = DatabaseConnector::dbms(connection),
     tempEmulationSchema = tempEmulationSchema
   )
-  DatabaseConnector::executeSql(connection, sql)
+  DatabaseConnector::executeSql(connection, sql, progressBar = progressBar)
 
   # included covariates
   includedCovTable <- ""
@@ -219,7 +223,8 @@ getDbDuringCovariateData <- function(
       tempTable = TRUE,
       data = data.frame(id = covariateSettings$includedCovariateIds),
       camelCaseToSnakeCase = TRUE,
-      tempEmulationSchema = tempEmulationSchema
+      tempEmulationSchema = tempEmulationSchema,
+      progressBar = progressBar
     )
   }
 
@@ -235,7 +240,8 @@ getDbDuringCovariateData <- function(
       tempTable = TRUE,
       data = data.frame(id = covariateSettings$includedCovariateConceptIds),
       camelCaseToSnakeCase = TRUE,
-      tempEmulationSchema = tempEmulationSchema
+      tempEmulationSchema = tempEmulationSchema,
+      progressBar = progressBar
     )
 
     if (covariateSettings$addDescendantsToInclude) {
@@ -262,7 +268,8 @@ getDbDuringCovariateData <- function(
       tempTable = TRUE,
       data = data.frame(id = covariateSettings$excludedCovariateConceptIds),
       camelCaseToSnakeCase = TRUE,
-      tempEmulationSchema = tempEmulationSchema
+      tempEmulationSchema = tempEmulationSchema,
+      progressBar = progressBar
     )
 
     if (covariateSettings$addDescendantsToInclude) {
@@ -325,7 +332,7 @@ getDbDuringCovariateData <- function(
     DatabaseConnector::executeSql(
       connection = connection,
       sql = sql,
-      progressBar = TRUE
+      progressBar = progressBar
     )
     time <- Sys.time() - start
     message(paste0("Execution took ", round(time, digits = 2), " ", units(time)))

diff --git a/R/Database.R b/R/Database.R
@@ -70,6 +70,7 @@ createSqliteDatabase <- function(
 #' @param resultsFolder        The folder containing the csv results
 #' @param tablePrefix          A prefix to append to the result tables for the characterization results
 #' @param csvTablePrefix      The prefix added to the csv results - default is 'c_'
+#' @param includedFiles      Specify the csv files to upload or NULL to upload all in directory
 #' @family Database
 #' @return
 #' Returns the connection to the sqlite database
@@ -79,13 +80,13 @@ createSqliteDatabase <- function(
 #' # generate results into resultsFolder
 #' conDet <- exampleOmopConnectionDetails()
 #'
-#' drSet <- createDechallengeRechallengeSettings(
-#'   targetIds = c(1,2),
+#' tteSet <- createTimeToEventSettings(
+#' targetIds = c(1,2),
 #'   outcomeIds = 3
-#' )
+#'   )
 #'
 #' cSet <- createCharacterizationSettings(
-#'   dechallengeRechallengeSettings = drSet
+#'   timeToEventSettings = tteSet
 #' )
 #'
 #' runCharacterizationAnalyses(
@@ -96,7 +97,7 @@ createSqliteDatabase <- function(
 #'   outcomeTable = 'cohort',
 #'   cdmDatabaseSchema = 'main',
 #'   characterizationSettings = cSet,
-#'   outputDirectory = tempdir()
+#'   outputDirectory = file.path(tempdir(),'database')
 #' )
 #'
 #' # create sqlite database
@@ -112,7 +113,8 @@ createSqliteDatabase <- function(
 #' insertResultsToDatabase(
 #'  connectionDetails = charResultDbCD,
 #'  schema = 'main',
-#'  resultsFolder = tempdir()
+#'  resultsFolder = file.path(tempdir(),'database'),
+#'  includedFiles = c('time_to_event')
 #' )
 #'
 #'
@@ -122,11 +124,16 @@ insertResultsToDatabase <- function(
     schema,
     resultsFolder,
     tablePrefix = "",
-    csvTablePrefix = "c_") {
+    csvTablePrefix = "c_",
+    includedFiles = NULL
+    ) {
   specLoc <- system.file("settings", "resultsDataModelSpecification.csv",
     package = "Characterization"
   )
   specs <- utils::read.csv(specLoc)
+  if(!is.null(includedFiles)){
+    specs <- specs[specs$table_name %in% includedFiles,]
+  }
   colnames(specs) <- SqlRender::snakeCaseToCamelCase(colnames(specs))
   specs$tableName <- paste0(csvTablePrefix, specs$tableName)
   ResultModelManager::uploadResults(
@@ -253,7 +260,8 @@ createCharacterizationTables <- function(
         )
         DatabaseConnector::executeSql(
           connection = conn,
-          sql = sql
+          sql = sql,
+          progressBar = FALSE
         )
 
         sql <- "DROP TABLE @my_schema.@table"
@@ -269,7 +277,8 @@ createCharacterizationTables <- function(
         )
         DatabaseConnector::executeSql(
           connection = conn,
-          sql = sql
+          sql = sql,
+          progressBar = FALSE
         )
       }
     }
@@ -288,10 +297,11 @@ createCharacterizationTables <- function(
 
     DatabaseConnector::executeSql(
       connection = conn,
-      sql = renderedSql
+      sql = renderedSql,
+      progressBar = FALSE
     )
 
-    # add database migration here in the future
+    ## add database migration here in the future
     migrateDataModel(
       connectionDetails = connectionDetails,
       connection = conn,
@@ -329,7 +339,11 @@ migrateDataModel <- function(
     connection <- DatabaseConnector::connect(connectionDetails = connectionDetails)
     on.exit(DatabaseConnector::disconnect(connection))
   }
-  DatabaseConnector::executeSql(connection, updateVersionSql)
+  DatabaseConnector::executeSql(
+    connection = connection,
+    sql = updateVersionSql,
+    progressBar = FALSE
+    )
 }