From bd144e7de585961e09a40c60a675da062c08e224 Mon Sep 17 00:00:00 2001 From: pieterlukasse Date: Wed, 29 Jan 2025 21:11:12 +0100 Subject: [PATCH] feat: cleanup / review todos --- models/cohortdata.go | 2 +- models/helper.go | 26 +++++++++++++------------- tests/models_tests/models_test.go | 18 +++++++++--------- utils/parsing.go | 4 ++-- 4 files changed, 25 insertions(+), 25 deletions(-) diff --git a/models/cohortdata.go b/models/cohortdata.go index 60ba6a0..e5c9ff7 100644 --- a/models/cohortdata.go +++ b/models/cohortdata.go @@ -104,7 +104,7 @@ func (h CohortData) RetrieveHistogramDataBySourceIdAndCohortIdAndConceptDefsPlus omopDataSource := dataSourceModel.GetDataSource(sourceId, Omop) resultsDataSource := dataSourceModel.GetDataSource(sourceId, Results) finalSetAlias := "final_set_alias" - histogramConcept, err := utils.GetLastCustomConceptVariableDef(filterConceptDefsAndCohortPairs) + histogramConcept, err := utils.CheckAndGetLastCustomConceptVariableDef(filterConceptDefsAndCohortPairs) if err != nil { log.Fatalf("failed: %v", err) return nil, err diff --git a/models/helper.go b/models/helper.go index f5de764..de30ae1 100644 --- a/models/helper.go +++ b/models/helper.go @@ -70,24 +70,24 @@ func QueryFilterByConceptDefHelper(query *gorm.DB, sourceId int, filterConceptDe simpleFilterConceptDef := utils.CustomConceptVariableDef{ConceptId: filterConceptDef.ConceptId} query.Select(fmt.Sprintf("%s.person_id, %s.observation_concept_id, %s.value_as_number ", observationTableAlias+"_a", observationTableAlias+"_a", observationTableAlias+"_a")) - query := QueryFilterByConceptDefHelper2(query, sourceId, simpleFilterConceptDef, - omopDataSource, "", personIdFieldForObservationJoin, "observation_continuous", observationTableAlias+"_a") + query := queryJoinAndFilterByConceptDef(query, sourceId, simpleFilterConceptDef, + omopDataSource, personIdFieldForObservationJoin, "observation_continuous", observationTableAlias+"_a") tmpTransformedTable, err := TransformDataIntoTempTable(omopDataSource, query, filterConceptDef) - // TODO - the resulting query should actually be Select * from temptable.... as this collapses all underlying queries. TODO2 - ensure the transform method also filters.... - query = QueryFilterByConceptDefHelper2(query, sourceId, filterConceptDef, //TODO - turn around - omopDataSource, "", personIdFieldForObservationJoin, tmpTransformedTable, observationTableAlias+"_b") + // TODO - the resulting query should actually be Select * from temptable.... as this collapses all underlying queries. + query = queryJoinAndFilterByConceptDef(query, sourceId, filterConceptDef, + omopDataSource, personIdFieldForObservationJoin, tmpTransformedTable, observationTableAlias+"_b") return query, observationTableAlias + "_b", err } else { // simple filterConceptDef with no transformation - query := QueryFilterByConceptDefHelper2(query, sourceId, filterConceptDef, - omopDataSource, "", personIdFieldForObservationJoin, "observation_continuous", observationTableAlias) + query := queryJoinAndFilterByConceptDef(query, sourceId, filterConceptDef, + omopDataSource, personIdFieldForObservationJoin, "observation_continuous", observationTableAlias) return query, "", nil } } -func QueryFilterByConceptDefHelper2(query *gorm.DB, sourceId int, filterConceptDef utils.CustomConceptVariableDef, - omopDataSource *utils.DbAndSchema, resultSchemaName string, personIdFieldForObservationJoin string, observationDataSource string, observationTableAlias string) *gorm.DB { +func queryJoinAndFilterByConceptDef(query *gorm.DB, sourceId int, filterConceptDef utils.CustomConceptVariableDef, + omopDataSource *utils.DbAndSchema, personIdFieldForObservationJoin string, observationDataSource string, observationTableAlias string) *gorm.DB { log.Printf("Adding extra INNER JOIN with alias %s", observationTableAlias) aliasedObservationDataSource := omopDataSource.Schema + "." + observationDataSource + " as " + observationTableAlias + omopDataSource.GetViewDirective() // for temp table, the alias is slightly different: @@ -131,7 +131,7 @@ func TransformDataIntoTempTable(omopDataSource *utils.DbAndSchema, query *gorm.D // Generate a unique hash key based on the query and transformation querySQL := utils.ToSQL(query) queryKey := fmt.Sprintf("%s|%s", querySQL, filterConceptDef.Transformation) - cacheKey := utils.GenerateHash(queryKey) // TODO - review + cacheKey := utils.GenerateHash(queryKey) // Check if the temporary table already exists in the cache if cachedTableName, exists := utils.TempTableCache.Get(cacheKey); exists { @@ -192,7 +192,7 @@ func CreateAndFillTempTable(omopDataSource *utils.DbAndSchema, query *gorm.DB, t case "box_cox": // Placeholder: implement Box-Cox transformation logic as per requirements log.Printf("box_cox transformation logic needs to be implemented") - return "" + panic("error") default: log.Fatalf("Unsupported transformation type: %s", filterConceptDef.Transformation) panic("error") @@ -212,7 +212,7 @@ func TempTableSQLAndFinalName(omopDataSource *utils.DbAndSchema, tempTableName s tempTableName, selectStatement, fromSQL, extraWhereSQL, ) if omopDataSource.Vendor == "sqlserver" { - finalTempTableName = "##" + tempTableName // Global temp table for MSSQL + finalTempTableName = "##" + tempTableName tempTableSQL = fmt.Sprintf( "SELECT %s INTO %s FROM (%s) T WHERE %s", selectStatement, finalTempTableName, fromSQL, extraWhereSQL, @@ -231,6 +231,7 @@ func QueryFilterByConceptDefsHelper(query *gorm.DB, sourceId int, filterConceptD return query } +// DEPRECATED - use QueryFilterByConceptDefsPlusCohortPairsHelper instead // Helper function that adds extra filter clauses to the query, for the given filterCohortPairs, intersecting on the // right set of tables, excluding data where necessary, etc. // It basically iterates over the list of filterCohortPairs, adding relevant INTERSECT and EXCEPT clauses, so that the resulting set is the @@ -247,7 +248,6 @@ func QueryFilterByCohortPairsHelper(filterCohortPairs []utils.CustomDichotomousV return query } -// TODO - remove code duplication above func QueryFilterByCohortPairHelper(query *gorm.DB, filterCohortPair utils.CustomDichotomousVariableDef, resultsDataSource *utils.DbAndSchema, cohortDefinitionId int, personIdFieldForObservationJoin string, unionAndIntersectSQLAlias string) *gorm.DB { unionAndIntersectSQL := "(" + "SELECT subject_id FROM " + resultsDataSource.Schema + ".cohort WHERE cohort_definition_id=? " diff --git a/tests/models_tests/models_test.go b/tests/models_tests/models_test.go index c5f2a0e..f47350f 100644 --- a/tests/models_tests/models_test.go +++ b/tests/models_tests/models_test.go @@ -258,7 +258,7 @@ func TestQueryFilterByCohortPairsHelper(t *testing.T) { var subjectIds []*SubjectId population := largestCohort query := models.QueryFilterByCohortPairsHelper(filterCohortPairs, resultsDataSource, population.Id, "unionAndIntersect"). - Select("*") + Select("unionAndIntersect.subject_id") _ = query.Scan(&subjectIds) // ...so we expect overlap the size of the largestCohort: if len(subjectIds) != largestCohort.CohortSize { @@ -280,7 +280,7 @@ func TestQueryFilterByCohortPairsHelper(t *testing.T) { population = largestCohort resultsDataSource = tests.GetResultsDataSource() query = models.QueryFilterByCohortPairsHelper(filterCohortPairs, resultsDataSource, population.Id, "unionAndIntersect"). - Select("*") + Select("unionAndIntersect.subject_id") _ = query.Scan(&subjectIds) // in this case we expect overlap the size of the largestCohort-6 (where 6 is the size of the overlap between extendedCopyOfSecondLargestCohort and largestCohort): if len(subjectIds) != (largestCohort.CohortSize - 6) { @@ -302,7 +302,7 @@ func TestQueryFilterByCohortPairsHelper(t *testing.T) { population = largestCohort resultsDataSource = tests.GetResultsDataSource() query = models.QueryFilterByCohortPairsHelper(filterCohortPairs, resultsDataSource, population.Id, "unionAndIntersect"). - Select("*") + Select("unionAndIntersect.subject_id") _ = query.Scan(&subjectIds) // in this case we expect same as previous test above: if len(subjectIds) != (largestCohort.CohortSize - 6) { @@ -320,7 +320,7 @@ func TestQueryFilterByCohortPairsHelper(t *testing.T) { population = extendedCopyOfSecondLargestCohort resultsDataSource = tests.GetResultsDataSource() query = models.QueryFilterByCohortPairsHelper(filterCohortPairs, resultsDataSource, population.Id, "unionAndIntersect"). - Select("*") + Select("unionAndIntersect.subject_id") _ = query.Scan(&subjectIds) // in this case we expect overlap the size of the extendedCopyOfSecondLargestCohort.CohortSize - secondLargestCohort.CohortSize: if len(subjectIds) != (extendedCopyOfSecondLargestCohort.CohortSize - secondLargestCohort.CohortSize) { @@ -342,7 +342,7 @@ func TestQueryFilterByCohortPairsHelper(t *testing.T) { population = extendedCopyOfSecondLargestCohort resultsDataSource = tests.GetResultsDataSource() query = models.QueryFilterByCohortPairsHelper(filterCohortPairs, resultsDataSource, population.Id, "unionAndIntersect"). - Select("*") + Select("unionAndIntersect.subject_id") _ = query.Scan(&subjectIds) // in this case we expect overlap the size to be 0, since all items remaining from first pair happen to overlap with largestCohort and are therefore excluded (pair overlap is excluded): if len(subjectIds) != 0 { @@ -354,7 +354,7 @@ func TestQueryFilterByCohortPairsHelper(t *testing.T) { population = largestCohort resultsDataSource = tests.GetResultsDataSource() query = models.QueryFilterByCohortPairsHelper(filterCohortPairs, resultsDataSource, population.Id, "unionAndIntersect"). - Select("*") + Select("unionAndIntersect.subject_id") _ = query.Scan(&subjectIds) // in this case we expect overlap the size to be 0 as explained in comment above: if len(subjectIds) != 0 { @@ -367,7 +367,7 @@ func TestQueryFilterByCohortPairsHelper(t *testing.T) { population = largestCohort resultsDataSource = tests.GetResultsDataSource() query = models.QueryFilterByCohortPairsHelper(filterCohortPairs, resultsDataSource, population.Id, "unionAndIntersect"). - Select("*") + Select("unionAndIntersect.subject_id") _ = query.Scan(&subjectIds) // in this case we expect overlap the size to be the size of the cohort, since there are no filtering pairs: if len(subjectIds) != largestCohort.CohortSize { @@ -385,7 +385,7 @@ func TestQueryFilterByCohortPairsHelper(t *testing.T) { population = largestCohort resultsDataSource = tests.GetResultsDataSource() query = models.QueryFilterByCohortPairsHelper(filterCohortPairs, resultsDataSource, population.Id, "unionAndIntersect"). - Select("*") + Select("unionAndIntersect.subject_id") _ = query.Scan(&subjectIds) // in this case we expect overlap the size to be 0, since the pair is composed of the same cohort in CohortDefinitionId1 and CohortDefinitionId2 and their overlap is excluded: if len(subjectIds) != 0 { @@ -403,7 +403,7 @@ func TestQueryFilterByCohortPairsHelper(t *testing.T) { population = smallestCohort resultsDataSource = tests.GetResultsDataSource() query = models.QueryFilterByCohortPairsHelper(filterCohortPairs, resultsDataSource, population.Id, "unionAndIntersect"). - Select("*") + Select("unionAndIntersect.subject_id") _ = query.Scan(&subjectIds) // in this case we expect overlap the size to be 0, since the cohorts in the pair do not overlap with the population: if len(subjectIds) != 0 { diff --git a/utils/parsing.go b/utils/parsing.go index da79922..0e44092 100644 --- a/utils/parsing.go +++ b/utils/parsing.go @@ -480,8 +480,8 @@ func GenerateSynchronizedTimestampID() string { return fmt.Sprintf("%x", time.Now().UnixNano()) } -// GetLastCustomConceptVariableDef checks if the last item is of type CustomConceptVariableDef and returns it -func GetLastCustomConceptVariableDef(filterConceptDefsAndCohortPairs []interface{}) (*CustomConceptVariableDef, error) { +// checks if the last item is of type CustomConceptVariableDef and returns it +func CheckAndGetLastCustomConceptVariableDef(filterConceptDefsAndCohortPairs []interface{}) (*CustomConceptVariableDef, error) { if len(filterConceptDefsAndCohortPairs) == 0 { return nil, fmt.Errorf("the slice is empty") }