From 217007e2070e75529e2563372a629f7c3740a59a Mon Sep 17 00:00:00 2001 From: JulienPeloton Date: Tue, 30 Jan 2024 13:31:52 +0100 Subject: [PATCH 1/4] Add a new CDS xmatch call for the SPICY catalog. --- fink_broker/science.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/fink_broker/science.py b/fink_broker/science.py index dd3d9fab..74f0bb0a 100644 --- a/fink_broker/science.py +++ b/fink_broker/science.py @@ -245,6 +245,18 @@ def apply_science_modules(df: DataFrame, noscience: bool = False) -> DataFrame: # see https://github.com/astrolabsoftware/fink-broker/issues/787 df = df.withColumnRenamed('Type', 'vsx') + _LOG.info("New processor: SPICY (1.2 arcsec)") + df = xmatch_cds( + df, + catalogname="vizier:J/ApJS/254/33/table1", + distmaxarcsec=1.2, + cols_out=['SPICY', 'class'], + types=['int', 'string'] + ) + # rename `SPICY` into `spicy_id` and `class` into `spicy_class` + df = df.withColumnRenamed('SPICY', 'spicy_id') + df = df.withColumnRenamed('class', 'spicy_class') + _LOG.info("New processor: GCVS (1.5 arcsec)") df = df.withColumn( 'gcvs', From 3272c1ef40e769a4e4a31ac32413dff4254f699e Mon Sep 17 00:00:00 2001 From: JulienPeloton Date: Tue, 30 Jan 2024 16:36:31 +0100 Subject: [PATCH 2/4] Cast values to avoid conversion problem between Spark/Python/HBase. --- fink_broker/hbaseUtils.py | 2 ++ fink_broker/science.py | 21 ++++++++++++++++++++- 2 files changed, 22 insertions(+), 1 deletion(-) diff --git a/fink_broker/hbaseUtils.py b/fink_broker/hbaseUtils.py index 6256d039..ea1c2d7c 100644 --- a/fink_broker/hbaseUtils.py +++ b/fink_broker/hbaseUtils.py @@ -77,6 +77,8 @@ def load_fink_cols(): 'upper_rate': {'type': 'double', 'default': 0.0}, 'delta_time': {'type': 'double', 'default': 0.0}, 'from_upper': {'type': 'boolean', 'default': False}, + 'spicy_id': {'type': 'int', 'default': -1}, + 'spicy_name': {'type': 'string', 'default': 'Unknown'}, } fink_nested_cols = {} diff --git a/fink_broker/science.py b/fink_broker/science.py index 74f0bb0a..7a532587 100644 --- a/fink_broker/science.py +++ b/fink_broker/science.py @@ -253,9 +253,28 @@ def apply_science_modules(df: DataFrame, noscience: bool = False) -> DataFrame: cols_out=['SPICY', 'class'], types=['int', 'string'] ) - # rename `SPICY` into `spicy_id` and `class` into `spicy_class` + # rename `SPICY` into `spicy_id`. Values are number or null df = df.withColumnRenamed('SPICY', 'spicy_id') + # Cast null into -1 + df = df.withColumn( + 'SPICY', + F.when( + df['SPICY'].isNull(), + F.lit(-1) + ).otherwise(df['SPICY']) + ) + + # rename `class` into `spicy_class`. Values are: + # Unknown, FS, ClassI, ClassII, ClassIII, or 'nan' df = df.withColumnRenamed('class', 'spicy_class') + # Make 'nan' 'Unknown' + df = df.withColumn( + 'spicy_class', + F.when( + df['spicy_class'] == 'nan', + F.lit('Unknown') + ).otherwise(df['spicy_class']) + ) _LOG.info("New processor: GCVS (1.5 arcsec)") df = df.withColumn( From f2daef5bcf1b98d1c5926448741f819e46122c6e Mon Sep 17 00:00:00 2001 From: JulienPeloton Date: Wed, 31 Jan 2024 11:43:09 +0100 Subject: [PATCH 3/4] Update test to add the 2 new columns --- fink_broker/hbaseUtils.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fink_broker/hbaseUtils.py b/fink_broker/hbaseUtils.py index ea1c2d7c..4bf27473 100644 --- a/fink_broker/hbaseUtils.py +++ b/fink_broker/hbaseUtils.py @@ -44,7 +44,7 @@ def load_fink_cols(): -------- >>> fink_cols, fink_nested_cols = load_fink_cols() >>> print(len(fink_cols)) - 27 + 29 >>> print(len(fink_nested_cols)) 18 @@ -109,7 +109,7 @@ def load_all_cols(): >>> root_level, candidates, images, fink_cols, fink_nested_cols = load_all_cols() >>> out = {**root_level, **candidates, **images, **fink_cols, **fink_nested_cols} >>> print(len(out)) - 156 + 158 """ fink_cols, fink_nested_cols = load_fink_cols() @@ -320,7 +320,7 @@ def load_ztf_index_cols(): -------- >>> out = load_ztf_index_cols() >>> print(len(out)) - 81 + 83 """ # From `root` or `candidates.` common = [ From f29e16dca266151f038decb5b147cc90a03bdb40 Mon Sep 17 00:00:00 2001 From: JulienPeloton Date: Wed, 31 Jan 2024 13:41:29 +0100 Subject: [PATCH 4/4] Fix typo in column name --- fink_broker/science.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fink_broker/science.py b/fink_broker/science.py index 7a532587..e81ca292 100644 --- a/fink_broker/science.py +++ b/fink_broker/science.py @@ -257,11 +257,11 @@ def apply_science_modules(df: DataFrame, noscience: bool = False) -> DataFrame: df = df.withColumnRenamed('SPICY', 'spicy_id') # Cast null into -1 df = df.withColumn( - 'SPICY', + 'spicy_id', F.when( - df['SPICY'].isNull(), + df['spicy_id'].isNull(), F.lit(-1) - ).otherwise(df['SPICY']) + ).otherwise(df['spicy_id']) ) # rename `class` into `spicy_class`. Values are: