Skip to content

Commit 01e6d90

Browse files
Elasticc round two! (#705)
* Install fink-science from URL * Update deps [skip-ci] * Update args for active learning in SNN * Remove T2 for elasticc * Remove T2 from the elasticc distribution * Test fink science v4 installation * Update CATS syntax * Add SLSN * Add new args when extracting features (#704) * Update taxonomy * Well string literals can be duplicated in my ideal world * Well string literals can be duplicated in my ideal world * Remove hardcoded installation of fink-science * Fake t2 for test purposes (t2 is not compatible with tensorflow 2.9) * Add key in the payload when writing elasticc data * Replace key by topic * PEP8 * Bump to 3.0 * Update requirements for tensorflow (fink-science 4.2+)
1 parent 7ae58e5 commit 01e6d90

File tree

8 files changed

+58
-48
lines changed

8 files changed

+58
-48
lines changed

.github/workflows/sonarqube.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ jobs:
2121
-Dsonar.organization=astrolabsoftware
2222
-Dsonar.projectKey=finkbroker
2323
-Dsonar.sources=fink_broker/,bin/
24+
-Dsonar.issue.ignore.multicriteria.j1.ruleKey=python:S107,python:S1192
2425
-Dsonar.test.exclusions=fink_broker/htmlcov,fink_broker/slackUtils.py
2526
-Dsonar.verbose=true
2627
-Dsonar.coverage.exclusions=**/**

bin/distribute_elasticc.py

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,6 @@ def format_df_to_elasticc(df):
8383
df['cats_fine_max_prob'].astype('float'),
8484
df['rf_snia_vs_nonia'].astype('float'),
8585
1.0 - df['rf_snia_vs_nonia'].astype('float'),
86-
df['t2_broad_max_prob'].astype('float'),
8786
)
8887
).withColumn(
8988
'classes',
@@ -96,7 +95,6 @@ def format_df_to_elasticc(df):
9695
df['cats_fine_class'].astype('int'),
9796
F.lit(111), # EarlySN
9897
F.lit(0), # EarlySN Others
99-
df['t2_broad_class'].astype('int')
10098
)
10199
).withColumn(
102100
'classifications',
@@ -149,12 +147,6 @@ def format_df_to_elasticc(df):
149147
F.col("classes").getItem(7),
150148
F.col("scores").getItem(7)
151149
),
152-
F.struct(
153-
F.lit('T2 classifier'),
154-
F.lit('version 1.0'),
155-
F.col("classes").getItem(8),
156-
F.col("scores").getItem(8)
157-
),
158150
).cast(classifications_schema)
159151
).drop("scores").drop("classes")
160152

bin/stream2raw.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,8 @@ def main():
8989
alert_schema_json = fastavro.schema.to_parsing_canonical_form(schema)
9090
df_decoded = df.select(
9191
[
92-
from_avro(df["value"], alert_schema_json).alias("decoded")
92+
from_avro(df["value"], alert_schema_json).alias("decoded"),
93+
df["topic"]
9394
]
9495
)
9596
elif args.producer == 'ztf':

deps/requirements-science-no-deps.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,4 +13,4 @@ torch==1.12.0+cpu
1313
george
1414
imbalanced-learn==0.7.0
1515
optuna==2.3.0
16-
tensorflow==2.8.0
16+
tensorflow==2.9.2

deps/requirements-science.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,4 +30,4 @@ torch==1.12.0+cpu
3030
george
3131
imbalanced-learn==0.7.0
3232
optuna==2.3.0
33-
tensorflow==2.8.0
33+
tensorflow==2.9.2

fink_broker/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,4 +12,4 @@
1212
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1313
# See the License for the specific language governing permissions and
1414
# limitations under the License.
15-
__version__ = "2.9"
15+
__version__ = "3.0"

fink_broker/science.py

Lines changed: 51 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
from pyspark.sql import DataFrame
1616
from pyspark.sql import functions as F
1717
from pyspark.sql.functions import pandas_udf, PandasUDFType
18-
from pyspark.sql.types import StringType, LongType
18+
from pyspark.sql.types import StringType, LongType, MapType, FloatType
1919

2020
import numpy as np
2121
import pandas as pd
@@ -44,7 +44,8 @@
4444
from fink_science.snn.processor import snn_ia_elasticc, snn_broad_elasticc
4545
from fink_science.cats.processor import predict_nn
4646
from fink_science.agn.processor import agn_elasticc
47-
from fink_science.t2.processor import t2
47+
from fink_science.slsn.processor import slsn_elasticc
48+
# from fink_science.t2.processor import t2
4849

4950
from fink_broker.tester import spark_unit_tests
5051

@@ -144,6 +145,22 @@ def ang2pix_array(ra: pd.Series, dec: pd.Series, nside: pd.Series) -> pd.Series:
144145

145146
return pd.Series(to_return)
146147

148+
@pandas_udf(MapType(StringType(), FloatType()), PandasUDFType.SCALAR)
149+
def fake_t2(incol):
150+
""" Return all t2 probabilities as zero
151+
152+
Only for test purposes.
153+
"""
154+
keys = [
155+
'M-dwarf', 'KN', 'AGN', 'SLSN-I',
156+
'RRL', 'Mira', 'SNIax', 'TDE',
157+
'SNIa', 'SNIbc', 'SNIa-91bg',
158+
'mu-Lens-Single', 'EB', 'SNII'
159+
]
160+
values = [0.0] * len(keys)
161+
out = {k: v for k, v in zip(keys, values)}
162+
return pd.Series([out] * len(incol))
163+
147164
def apply_science_modules(df: DataFrame, logger: Logger) -> DataFrame:
148165
"""Load and apply Fink science modules to enrich alert content
149166
@@ -317,9 +334,10 @@ def apply_science_modules(df: DataFrame, logger: Logger) -> DataFrame:
317334
df = df.withColumn('rf_kn_vs_nonkn', knscore(*knscore_args))
318335

319336
logger.info("New processor: T2")
320-
t2_args = ['candid', 'cjd', 'cfid', 'cmagpsf', 'csigmapsf']
321-
t2_args += [F.col('roid'), F.col('cdsxmatch'), F.col('candidate.jdstarthist')]
322-
df = df.withColumn('t2', t2(*t2_args))
337+
# t2_args = ['candid', 'cjd', 'cfid', 'cmagpsf', 'csigmapsf']
338+
# t2_args += [F.col('roid'), F.col('cdsxmatch'), F.col('candidate.jdstarthist')]
339+
# df = df.withColumn('t2', t2(*t2_args))
340+
df = df.withColumn('t2', fake_t2('objectId'))
323341

324342
# Apply level one processor: snad (light curve features)
325343
logger.info("New processor: ad_features")
@@ -406,9 +424,18 @@ def apply_science_modules_elasticc(df: DataFrame, logger: Logger) -> DataFrame:
406424
df = df.withColumn('redshift_err', F.col('diaObject.z_final_err'))
407425

408426
logger.info("New processor: EarlySN")
427+
409428
args = ['cmidPointTai', 'cfilterName', 'cpsFlux', 'cpsFluxErr']
410-
# fake args
411-
args += [F.col('cdsxmatch'), F.lit(20), F.lit(40)]
429+
430+
# fake cdsxmatch and nobs
431+
args += [F.col('cdsxmatch'), F.lit(20)]
432+
args += [F.col('diaObject.ra'), F.col('diaObject.decl')]
433+
args += [F.col('diaObject.hostgal_ra'), F.col('diaObject.hostgal_dec')]
434+
args += [F.col('diaObject.hostgal_zphot')]
435+
args += [F.col('diaObject.hostgal_zphot_err'), F.col('diaObject.mwebv')]
436+
437+
# maxduration
438+
args += [F.lit(40)]
412439
df = df.withColumn('rf_snia_vs_nonia', rfscore_sigmoid_elasticc(*args))
413440

414441
# Apply level one processor: superNNova
@@ -429,7 +456,6 @@ def apply_science_modules_elasticc(df: DataFrame, logger: Logger) -> DataFrame:
429456
df = df.withColumn('preds_snn', snn_broad_elasticc(*args))
430457

431458
mapping_snn = {
432-
-1: 0,
433459
0: 11,
434460
1: 13,
435461
2: 12,
@@ -449,32 +475,17 @@ def apply_science_modules_elasticc(df: DataFrame, logger: Logger) -> DataFrame:
449475
df = df.withColumn('cbpf_preds', predict_nn(*args))
450476

451477
mapping_cats_general = {
452-
-1: 0,
453-
0: 111,
454-
1: 112,
455-
2: 113,
456-
3: 114,
457-
4: 115,
458-
5: 121,
459-
6: 122,
460-
7: 123,
461-
8: 124,
462-
9: 131,
463-
10: 132,
464-
11: 133,
465-
12: 134,
466-
13: 135,
467-
14: 211,
468-
15: 212,
469-
16: 213,
470-
17: 214,
471-
18: 221
478+
0: 11,
479+
1: 12,
480+
2: 13,
481+
3: 21,
482+
4: 22,
472483
}
473484
mapping_cats_general_expr = F.create_map([F.lit(x) for x in chain(*mapping_cats_general.items())])
474485

475-
col_fine_class = F.col('cbpf_preds').getItem(0).astype('int')
476-
df = df.withColumn('cats_fine_class', mapping_cats_general_expr[col_fine_class])
477-
df = df.withColumn('cats_fine_max_prob', F.col('cbpf_preds').getItem(1))
486+
df = df.withColumn('argmax', F.expr('array_position(cbpf_preds, array_max(cbpf_preds)) - 1'))
487+
df = df.withColumn('cats_broad_class', mapping_cats_general_expr[df['argmax']])
488+
df = df.withColumn('cats_broad_max_prob', F.array_max(df['cbpf_preds']))
478489

479490
# AGN
480491
args_forced = [
@@ -485,13 +496,18 @@ def apply_science_modules_elasticc(df: DataFrame, logger: Logger) -> DataFrame:
485496
]
486497
df = df.withColumn('rf_agn_vs_nonagn', agn_elasticc(*args_forced))
487498

488-
# T2
489-
df = df.withColumn('t2_broad_class', F.lit(0))
490-
df = df.withColumn('t2_broad_max_prob', F.lit(0.0))
499+
# SLSN
500+
args_forced = [
501+
'diaObject.diaObjectId', 'cmidPointTai', 'cpsFlux', 'cpsFluxErr', 'cfilterName',
502+
'diaSource.ra', 'diaSource.decl',
503+
'diaObject.hostgal_zphot', 'diaObject.hostgal_zphot_err',
504+
'diaObject.hostgal_ra', 'diaObject.hostgal_dec'
505+
]
506+
df = df.withColumn('rf_slsn_vs_nonslsn', slsn_elasticc(*args_forced))
491507

492508
# Drop temp columns
493509
df = df.drop(*expanded)
494-
df = df.drop(*['preds_snn', 'cbpf_preds', 'redshift', 'redshift_err', 'cdsxmatch', 'roid'])
510+
df = df.drop(*['preds_snn', 'cbpf_preds', 'redshift', 'redshift_err', 'cdsxmatch', 'roid', 'argmax'])
495511

496512
return df
497513

sonar-project.properties

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ sonar.exclusions=fink_broker/htmlcov,fink_broker/slackUtils.py
1010

1111
# Functions, methods and lambdas should not have too many parameters
1212
# Well, they shouldn't.
13-
sonar.issue.ignore.multicriteria.j1.ruleKey=python:S107
13+
sonar.issue.ignore.multicriteria.j1.ruleKey=python:S107,python:S1192
1414
sonar.issue.ignore.multicriteria.j1.resourceKey=**/*.py
1515

1616
# Path to coverage file (need xml)

0 commit comments

Comments
 (0)