From 7d019f81135f77c2b46d416d357e2a4e278d9e8d Mon Sep 17 00:00:00 2001 From: Harshinee Sriram Date: Thu, 30 Jan 2025 12:39:41 -0800 Subject: [PATCH] Update binarization_util.py In the BinBinarizer.fit() method, the line tmp_num_thresholds -= 1 is called before tmp_num_thresholds is initialized. Hence, if you call the BinBinarizer.fit() method, you can run into this issue: UnboundLocalError: local variable 'tmp_num_thresholds' referenced before assignment Signed-off-by: Harshinee Sriram --- src/fasterrisk/binarization_util.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/fasterrisk/binarization_util.py b/src/fasterrisk/binarization_util.py index a081736..ac9b9f8 100644 --- a/src/fasterrisk/binarization_util.py +++ b/src/fasterrisk/binarization_util.py @@ -142,7 +142,10 @@ def fit(self, df: pd.DataFrame) -> None: for col_idx in range(len(self.cols)): col = self.cols[col_idx] col_value = df[col] - + + # Initialize tmp_num_thresholds *before* checking for NaNs + tmp_num_thresholds = self.max_num_thresholds_per_feature + if col_value.isnull().sum() > 0: tmp_num_thresholds -= 1 binarizers.append({ # need to keep track of NaN for every column @@ -267,4 +270,4 @@ def transform(self, df: pd.DataFrame) -> tuple: def fit_transform(self, df: pd.DataFrame) -> pd.DataFrame: '''fit and transform on same dataframe''' self.fit(df) - return self.transform(df) \ No newline at end of file + return self.transform(df)