diff --git a/impc_etl/jobs/load/solr/stats_results_mapper.py b/impc_etl/jobs/load/solr/stats_results_mapper.py index b1d325c78..937e73c9b 100644 --- a/impc_etl/jobs/load/solr/stats_results_mapper.py +++ b/impc_etl/jobs/load/solr/stats_results_mapper.py @@ -173,7 +173,8 @@ def process_manual_phenotype_calls(): for col_name in reference_columns: if col_name not in viability_stats.columns: viability_stats = viability_stats.withColumn(col_name, f.lit(None)) - viability_stats = viability_stats.select(reference_columns) + viability_columns = reference_columns + ["category"] + viability_stats = viability_stats.select(viability_columns) # Process gross pathology stats gross_pathology_stats = gross_pathology_stats_results( @@ -1205,11 +1206,12 @@ def generate_final_stats_results(): col_name for col_name in WINDOW_COLUMNS if col_name != "observations_window_weight" - ] + ] + ["category"] stats_results_df = open_stats_df.select(*stats_results_column_list) stats_results_df = stats_results_df.repartition(10000) else: - stats_results_df = open_stats_df.select(*STATS_RESULTS_COLUMNS) + stats_results_column_list = STATS_RESULTS_COLUMNS + ["category"] + stats_results_df = open_stats_df.select(*stats_results_column_list) # Write main results