Update columns to push to HBase tables (#673)

* Start to think about datastructure * Update columns to be pushed to HBase * Do not push features * PEP8
astrolabsoftware · Jan 25, 2023 · cf41361 · cf41361
1 parent 1188779
commit cf41361
Show file tree

Hide file tree

Showing 2 changed files with 30 additions and 3 deletions.
diff --git a/bin/index_archival.py b/bin/index_archival.py
@@ -118,9 +118,13 @@ def main():
         'gcvs',
         'vsx',
         'snn_snia_vs_nonia', 'snn_sn_vs_all', 'rf_snia_vs_nonia',
-        'classtar', 'drb', 'ndethist', 'rf_kn_vs_nonkn', 'tracklet'
+        'classtar', 'drb', 'ndethist', 'rf_kn_vs_nonkn', 'tracklet',
+        'anomaly_score', 'x4lac', 'x3hsp'
     ]
 
+    common_cols += [col_ for col_ in df.columns if col_.startswith('t2_')]
+    common_cols += [col_ for col_ in df.columns if col_.startswith('mangrove_')]
+
     if columns[0].startswith('pixel'):
         nside = int(columns[0].split('pixel')[1])
 

diff --git a/fink_broker/hbaseUtils.py b/fink_broker/hbaseUtils.py
@@ -23,6 +23,9 @@
 from fink_broker import __version__ as fbvsn
 from fink_science import __version__ as fsvsn
 
+from fink_science.t2.utilities import T2_COLS
+from fink_science.xmatch.utils import MANGROVE_COLS
+
 from fink_broker.tester import spark_unit_tests
 
 def load_hbase_data(catalog: str, rowkey: str) -> DataFrame:
@@ -161,7 +164,7 @@ def load_science_portal_column_names():
     --------
     >>> cols_i, cols_d, cols_b = load_science_portal_column_names()
     >>> print(len(cols_d))
-    14
+    35
     """
     # Column family i
     cols_i = [
@@ -188,9 +191,29 @@ def load_science_portal_column_names():
         'Plx',
         'e_Plx',
         'gcvs',
-        'vsx'
+        'vsx',
+        'x4lac',
+        'x3hsp',
+        'anomaly_score'
+    ]
+
+    # mangrove
+    cols_d += [
+        col('mangrove.{}'.format(i)).alias('mangrove_{}'.format(i)) for i in MANGROVE_COLS
     ]
 
+    cols_d += [
+        col('t2.{}'.format(i)).alias('t2_{}'.format(i)) for i in T2_COLS
+    ]
+
+    # cols_d += [
+    #     col('lc_features_g.{}'.format(i)).alias('lc_features_g_{}'.format(i)) for i in FEATURES_COLS
+    # ]
+
+    # cols_d += [
+    #     col('lc_features_r.{}'.format(i)).alias('lc_features_r_{}'.format(i)) for i in FEATURES_COLS
+    # ]
+
     # Column family binary
     cols_b = [
         col('cutoutScience.stampData').alias('cutoutScience_stampData'),