From 544d2fac8d668d2a9d2ca11909ff6aab69a9a699 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Karina=20Bartolom=C3=A9?= Date: Fri, 10 May 2024 00:46:44 -0300 Subject: [PATCH] :notebook: doc --- .../01_calibracion/01_calibration_slides.html | 1951 +-- .../01_calibracion/01_calibration_slides.qmd | 35 +- .../figure-html/cell-27-output-10.png | Bin 0 -> 109828 bytes .../figure-html/cell-27-output-11.png | Bin 0 -> 102873 bytes .../figure-html/cell-27-output-14.png | Bin 0 -> 110361 bytes .../figure-html/cell-27-output-15.png | Bin 0 -> 97933 bytes .../figure-html/cell-27-output-18.png | Bin 0 -> 114949 bytes .../figure-html/cell-27-output-19.png | Bin 0 -> 111400 bytes .../figure-html/cell-27-output-2.png | Bin 0 -> 111702 bytes .../figure-html/cell-27-output-22.png | Bin 0 -> 109215 bytes .../figure-html/cell-27-output-23.png | Bin 0 -> 106229 bytes .../figure-html/cell-27-output-3.png | Bin 0 -> 106467 bytes .../figure-html/cell-27-output-6.png | Bin 0 -> 103712 bytes .../figure-html/cell-27-output-7.png | Bin 0 -> 69313 bytes .../figure-html/cell-30-output-10.png | Bin 0 -> 129956 bytes .../figure-html/cell-30-output-11.png | Bin 0 -> 104461 bytes .../figure-html/cell-30-output-14.png | Bin 0 -> 129955 bytes .../figure-html/cell-30-output-15.png | Bin 0 -> 93712 bytes .../figure-html/cell-30-output-18.png | Bin 0 -> 135626 bytes .../figure-html/cell-30-output-19.png | Bin 0 -> 111801 bytes .../figure-html/cell-30-output-2.png | Bin 0 -> 131043 bytes .../figure-html/cell-30-output-22.png | Bin 0 -> 125842 bytes .../figure-html/cell-30-output-23.png | Bin 0 -> 104826 bytes .../figure-html/cell-30-output-3.png | Bin 0 -> 102741 bytes .../figure-html/cell-30-output-6.png | Bin 0 -> 112670 bytes .../figure-html/cell-30-output-7.png | Bin 0 -> 73025 bytes .../figure-html/fig-credit-tree-output-1.png | Bin 0 -> 108940 bytes .../figure-html/fig-distrib-tree-output-1.png | Bin 0 -> 34113 bytes .../figure-html/fig-log-output-1.png | Bin 0 -> 25694 bytes .../figure-html/fig-reliability-output-1.png | Bin 0 -> 43125 bytes 01_ml/01_clasificacion/01_calibracion/bib.bib | 63 + .../01_calibracion/custom.scss | 302 +- .../01_calibracion/custom_testing.scss | 340 - .../{custom_functions.py => functions.py} | 0 .../01_calibracion/logo-uba.jpeg | Bin 7939 -> 0 bytes .../01_calibracion/testing-stuff.html | 10137 ---------------- .../{ => testing}/testing-stuff.qmd | 8 +- 37 files changed, 1371 insertions(+), 11465 deletions(-) create mode 100644 01_ml/01_clasificacion/01_calibracion/01_calibration_slides_files/figure-html/cell-27-output-10.png create mode 100644 01_ml/01_clasificacion/01_calibracion/01_calibration_slides_files/figure-html/cell-27-output-11.png create mode 100644 01_ml/01_clasificacion/01_calibracion/01_calibration_slides_files/figure-html/cell-27-output-14.png create mode 100644 01_ml/01_clasificacion/01_calibracion/01_calibration_slides_files/figure-html/cell-27-output-15.png create mode 100644 01_ml/01_clasificacion/01_calibracion/01_calibration_slides_files/figure-html/cell-27-output-18.png create mode 100644 01_ml/01_clasificacion/01_calibracion/01_calibration_slides_files/figure-html/cell-27-output-19.png create mode 100644 01_ml/01_clasificacion/01_calibracion/01_calibration_slides_files/figure-html/cell-27-output-2.png create mode 100644 01_ml/01_clasificacion/01_calibracion/01_calibration_slides_files/figure-html/cell-27-output-22.png create mode 100644 01_ml/01_clasificacion/01_calibracion/01_calibration_slides_files/figure-html/cell-27-output-23.png create mode 100644 01_ml/01_clasificacion/01_calibracion/01_calibration_slides_files/figure-html/cell-27-output-3.png create mode 100644 01_ml/01_clasificacion/01_calibracion/01_calibration_slides_files/figure-html/cell-27-output-6.png create mode 100644 01_ml/01_clasificacion/01_calibracion/01_calibration_slides_files/figure-html/cell-27-output-7.png create mode 100644 01_ml/01_clasificacion/01_calibracion/01_calibration_slides_files/figure-html/cell-30-output-10.png create mode 100644 01_ml/01_clasificacion/01_calibracion/01_calibration_slides_files/figure-html/cell-30-output-11.png create mode 100644 01_ml/01_clasificacion/01_calibracion/01_calibration_slides_files/figure-html/cell-30-output-14.png create mode 100644 01_ml/01_clasificacion/01_calibracion/01_calibration_slides_files/figure-html/cell-30-output-15.png create mode 100644 01_ml/01_clasificacion/01_calibracion/01_calibration_slides_files/figure-html/cell-30-output-18.png create mode 100644 01_ml/01_clasificacion/01_calibracion/01_calibration_slides_files/figure-html/cell-30-output-19.png create mode 100644 01_ml/01_clasificacion/01_calibracion/01_calibration_slides_files/figure-html/cell-30-output-2.png create mode 100644 01_ml/01_clasificacion/01_calibracion/01_calibration_slides_files/figure-html/cell-30-output-22.png create mode 100644 01_ml/01_clasificacion/01_calibracion/01_calibration_slides_files/figure-html/cell-30-output-23.png create mode 100644 01_ml/01_clasificacion/01_calibracion/01_calibration_slides_files/figure-html/cell-30-output-3.png create mode 100644 01_ml/01_clasificacion/01_calibracion/01_calibration_slides_files/figure-html/cell-30-output-6.png create mode 100644 01_ml/01_clasificacion/01_calibracion/01_calibration_slides_files/figure-html/cell-30-output-7.png create mode 100644 01_ml/01_clasificacion/01_calibracion/01_calibration_slides_files/figure-html/fig-credit-tree-output-1.png create mode 100644 01_ml/01_clasificacion/01_calibracion/01_calibration_slides_files/figure-html/fig-distrib-tree-output-1.png create mode 100644 01_ml/01_clasificacion/01_calibracion/01_calibration_slides_files/figure-html/fig-log-output-1.png create mode 100644 01_ml/01_clasificacion/01_calibracion/01_calibration_slides_files/figure-html/fig-reliability-output-1.png create mode 100644 01_ml/01_clasificacion/01_calibracion/bib.bib delete mode 100644 01_ml/01_clasificacion/01_calibracion/custom_testing.scss rename 01_ml/01_clasificacion/01_calibracion/{custom_functions.py => functions.py} (100%) delete mode 100644 01_ml/01_clasificacion/01_calibracion/logo-uba.jpeg delete mode 100644 01_ml/01_clasificacion/01_calibracion/testing-stuff.html rename 01_ml/01_clasificacion/01_calibracion/{ => testing}/testing-stuff.qmd (98%) diff --git a/01_ml/01_clasificacion/01_calibracion/01_calibration_slides.html b/01_ml/01_clasificacion/01_calibracion/01_calibration_slides.html index 7033c3d..9d10401 100644 --- a/01_ml/01_clasificacion/01_calibracion/01_calibration_slides.html +++ b/01_ml/01_clasificacion/01_calibracion/01_calibration_slides.html @@ -6020,7 +6020,27 @@ code span.va { color: #111111; } code span.vs { color: #20794d; } code span.wa { color: #5e5e5e; font-style: italic; } - + +div.csl-bib-body { } +div.csl-entry { +clear: both; +margin-bottom: 0em; +} +.hanging-indent div.csl-entry { +margin-left:2em; +text-indent:-2em; +} +div.csl-left-margin { +min-width:2em; +float:left; +} +div.csl-right-inline { +margin-left:2em; +padding-left:1em; +} +div.csl-indent { +margin-left: 2em; +} @@ -8979,7 +9000,7 @@

2.1 Datos

La variable objetivo (target) es Risk, donde el porcentaje de observaciones de clase 1 (riesgosos) es 30.0%

- +

2.2 Particiones

@@ -9061,50 +9082,50 @@

2.3 Modelo de clasificación simp
-
+

@@ -9159,7 +9180,7 @@

2.3 Modelo de clasificación simp

Ajuste del modelo:

-
+
Código
X_train_subset = X_train[subset_cols].copy()
@@ -9215,7 +9236,7 @@ 

2.3 Modelo de clasificación simp

2.4 Evaluación de un modelo de clasificación

Dados los datos de evaluación, se utiliza el modelo para predecir el valor de la variable objetivo. Mediante predict() se obtienen las predicciones de clase. Con predict_proba()el clasificador devuelve un score (valor entre 0 y 1).

-
+
Ver código
preds = (pd.concat([y_test, X_test], axis=1)
@@ -9257,50 +9278,50 @@ 

2.4 Evaluación de un modelo de c
-
+

@@ -9397,50 +9418,50 @@

2.4 Evaluación de un modelo de c
-
+

@@ -9632,7 +9653,7 @@

3 Comparación de modelos de ML3.1 Preprocesamiento

Muchos modelos de aprendizaje automático requieren que los datos sean numéricos. Para ello, se construye un pipeline de preprocesamiento de variables, diferenciando el procesamiento según tipo de variables: categóricas y numéricas.

🔗 sklearn.preprocessing

-
+
Ver código
numeric_transformer = Pipeline([
@@ -9656,31 +9677,31 @@ 

3.1 Preprocesamiento

], verbose_feature_names_out=False)
-
+
ColumnTransformer(transformers=[('num',
                                  Pipeline(steps=[('impute',
                                                   SimpleImputer(strategy='median')),
                                                  ('scaler', MinMaxScaler())]),
-                                 <sklearn.compose._column_transformer.make_column_selector object at 0x14ac68850>),
+                                 <sklearn.compose._column_transformer.make_column_selector object at 0x14d66f730>),
                                 ('cat',
                                  Pipeline(steps=[('ohe',
                                                   OneHotEncoder(handle_unknown='infrequent_if_exist',
                                                                 min_frequency=0.05,
                                                                 sparse_output=False))]),
-                                 <sklearn.compose._column_transformer.make_column_selector object at 0x14ac68940>)],
+                                 <sklearn.compose._column_transformer.make_column_selector object at 0x14d66c310>)],
                   verbose_feature_names_out=False)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
@@ -9779,18 +9800,17 @@

3.1 Preprocesamiento

- + - - - - + + + @@ -9799,25 +9819,28 @@

3.1 Preprocesamiento

- + + - - - - + + + + + + @@ -9825,8 +9848,6 @@

3.1 Preprocesamiento

- -
0.570.23 0.670.320.120.01.00.140.36 0.0 1.0 0.0 1.0 0.0 0.01.0 0.0 0.0 0.00.0 0.0 0.00.0 1.0 0.00.00.0
0.140.670.160.570.271.00.470.2 0.0 1.0 0.0 1.0 0.00.0 1.0 0.0 0.0 0.0 0.01.0 0.0 0.0 0.00.0 0.0 0.00.01.0
@@ -9856,13 +9877,13 @@

3.2 Modelado

SimpleImputer(strategy='median')), ('scaler', MinMaxScaler())]), - <sklearn.compose._column_transformer.make_column_selector object at 0x14ac68850>), + <sklearn.compose._column_transformer.make_column_selector object at 0x14d66f730>), ('cat', Pipeline(steps=[('ohe', OneHotEncoder(handle_unknown='infrequent_if_exist', min_frequency=0.05, sparse_output=False))]), - <sklearn.compose._column_transformer.make_column_selector object at 0x14ac68940>)], + <sklearn.compose._column_transformer.make_column_selector object at 0x14d66c310>)], verbose_feature_names_out=False)), ('model', HistGradientBoostingClassifier(max_depth=4, max_iter=1000, @@ -9872,13 +9893,13 @@

3.2 Modelado

SimpleImputer(strategy='median')), ('scaler', MinMaxScaler())]), - <sklearn.compose._column_transformer.make_column_selector object at 0x14ac68850>), + <sklearn.compose._column_transformer.make_column_selector object at 0x14d66f730>), ('cat', Pipeline(steps=[('ohe', OneHotEncoder(handle_unknown='infrequent_if_exist', min_frequency=0.05, sparse_output=False))]), - <sklearn.compose._column_transformer.make_column_selector object at 0x14ac68940>)], + <sklearn.compose._column_transformer.make_column_selector object at 0x14d66c310>)], verbose_feature_names_out=False)), ('model', HistGradientBoostingClassifier(max_depth=4, max_iter=1000, @@ -9886,14 +9907,14 @@

3.2 Modelado

Pipeline(steps=[('impute', SimpleImputer(strategy='median')), ('scaler', MinMaxScaler())]), - <sklearn.compose._column_transformer.make_column_selector object at 0x14ac68850>), + <sklearn.compose._column_transformer.make_column_selector object at 0x14d66f730>), ('cat', Pipeline(steps=[('ohe', OneHotEncoder(handle_unknown='infrequent_if_exist', min_frequency=0.05, sparse_output=False))]), - <sklearn.compose._column_transformer.make_column_selector object at 0x14ac68940>)], - verbose_feature_names_out=False)
<sklearn.compose._column_transformer.make_column_selector object at 0x14ac68850>
SimpleImputer(strategy='median')
MinMaxScaler()
<sklearn.compose._column_transformer.make_column_selector object at 0x14ac68940>
OneHotEncoder(handle_unknown='infrequent_if_exist', min_frequency=0.05,
+                                 <sklearn.compose._column_transformer.make_column_selector object at 0x14d66c310>)],
+                  verbose_feature_names_out=False)
<sklearn.compose._column_transformer.make_column_selector object at 0x14d66f730>
SimpleImputer(strategy='median')
MinMaxScaler()
<sklearn.compose._column_transformer.make_column_selector object at 0x14d66c310>
OneHotEncoder(handle_unknown='infrequent_if_exist', min_frequency=0.05,
               sparse_output=False)
HistGradientBoostingClassifier(max_depth=4, max_iter=1000, random_state=42)
@@ -9903,13 +9924,13 @@

3.2 Modelado

SimpleImputer(strategy='median')), ('scaler', MinMaxScaler())]), - <sklearn.compose._column_transformer.make_column_selector object at 0x14ac68850>), + <sklearn.compose._column_transformer.make_column_selector object at 0x14d66f730>), ('cat', Pipeline(steps=[('ohe', OneHotEncoder(handle_unknown='infrequent_if_exist', min_frequency=0.05, sparse_output=False))]), - <sklearn.compose._column_transformer.make_column_selector object at 0x14ac68940>)], + <sklearn.compose._column_transformer.make_column_selector object at 0x14d66c310>)], verbose_feature_names_out=False)), ('model', DecisionTreeClassifier(random_state=42))])In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.