From 85858315489ca167670046f03b08bf4e7d16e032 Mon Sep 17 00:00:00 2001 From: Julien Date: Thu, 1 Feb 2024 08:03:05 +0100 Subject: [PATCH] Add crossmatch with the SPICY catalog (#803) * Add a new CDS xmatch call for the SPICY catalog. * Cast values to avoid conversion problem between Spark/Python/HBase. * Update test to add the 2 new columns * Fix typo in column name --- fink_broker/hbaseUtils.py | 8 +++++--- fink_broker/science.py | 31 +++++++++++++++++++++++++++++++ 2 files changed, 36 insertions(+), 3 deletions(-) diff --git a/fink_broker/hbaseUtils.py b/fink_broker/hbaseUtils.py index 6256d039..4bf27473 100644 --- a/fink_broker/hbaseUtils.py +++ b/fink_broker/hbaseUtils.py @@ -44,7 +44,7 @@ def load_fink_cols(): -------- >>> fink_cols, fink_nested_cols = load_fink_cols() >>> print(len(fink_cols)) - 27 + 29 >>> print(len(fink_nested_cols)) 18 @@ -77,6 +77,8 @@ def load_fink_cols(): 'upper_rate': {'type': 'double', 'default': 0.0}, 'delta_time': {'type': 'double', 'default': 0.0}, 'from_upper': {'type': 'boolean', 'default': False}, + 'spicy_id': {'type': 'int', 'default': -1}, + 'spicy_name': {'type': 'string', 'default': 'Unknown'}, } fink_nested_cols = {} @@ -107,7 +109,7 @@ def load_all_cols(): >>> root_level, candidates, images, fink_cols, fink_nested_cols = load_all_cols() >>> out = {**root_level, **candidates, **images, **fink_cols, **fink_nested_cols} >>> print(len(out)) - 156 + 158 """ fink_cols, fink_nested_cols = load_fink_cols() @@ -318,7 +320,7 @@ def load_ztf_index_cols(): -------- >>> out = load_ztf_index_cols() >>> print(len(out)) - 81 + 83 """ # From `root` or `candidates.` common = [ diff --git a/fink_broker/science.py b/fink_broker/science.py index dd3d9fab..e81ca292 100644 --- a/fink_broker/science.py +++ b/fink_broker/science.py @@ -245,6 +245,37 @@ def apply_science_modules(df: DataFrame, noscience: bool = False) -> DataFrame: # see https://github.com/astrolabsoftware/fink-broker/issues/787 df = df.withColumnRenamed('Type', 'vsx') + _LOG.info("New processor: SPICY (1.2 arcsec)") + df = xmatch_cds( + df, + catalogname="vizier:J/ApJS/254/33/table1", + distmaxarcsec=1.2, + cols_out=['SPICY', 'class'], + types=['int', 'string'] + ) + # rename `SPICY` into `spicy_id`. Values are number or null + df = df.withColumnRenamed('SPICY', 'spicy_id') + # Cast null into -1 + df = df.withColumn( + 'spicy_id', + F.when( + df['spicy_id'].isNull(), + F.lit(-1) + ).otherwise(df['spicy_id']) + ) + + # rename `class` into `spicy_class`. Values are: + # Unknown, FS, ClassI, ClassII, ClassIII, or 'nan' + df = df.withColumnRenamed('class', 'spicy_class') + # Make 'nan' 'Unknown' + df = df.withColumn( + 'spicy_class', + F.when( + df['spicy_class'] == 'nan', + F.lit('Unknown') + ).otherwise(df['spicy_class']) + ) + _LOG.info("New processor: GCVS (1.5 arcsec)") df = df.withColumn( 'gcvs',