settadev · KevinMusgrave · Mar 10, 2025 · Mar 8, 2025 · Mar 8, 2025 · Mar 8, 2025
diff --git a/sapientml_automl/.gitignore b/sapientml_automl/.gitignore
@@ -0,0 +1 @@
+outputs*
diff --git a/sapientml_automl/setta_files/setta.db b/sapientml_automl/setta_files/setta.db
diff --git a/sapientml_automl/setta_files/setta_export/with_variants/hotel/Global [email protected] b/sapientml_automl/setta_files/setta_export/with_variants/hotel/Global [email protected]
@@ -0,0 +1,4 @@
+root_url: "https://github.com/sapientml/sapientml/files/"
+train_path: f"{root_url}12617021/train_hotelcancel-prediction.csv"
+test_path: f"{root_url}12617033/test_hotelcancel-prediction.csv"
+target_column: "Status"
diff --git a/...entml_automl/setta_files/setta_export/with_variants/hotel/Global [email protected] b/...entml_automl/setta_files/setta_export/with_variants/hotel/Global [email protected]
@@ -0,0 +1,4 @@
+root_url: "https://github.com/sapientml/sapientml/files/"
+train_path: f"{root_url}12617660/train_medical-insurance-prediction.csv"
+test_path: f"{root_url}12617696/test_medical-insurance-prediction.csv"
+target_column: "charges"
diff --git a/sapientml_automl/setta_files/setta_export/with_variants/hotel/[email protected] b/sapientml_automl/setta_files/setta_export/with_variants/hotel/[email protected]
@@ -0,0 +1,13 @@
+sml.fit
+---
+training_data: train_data_split[0]
+ignore_columns: ["No."]
+output_dir: f"outputs_{version}_{int(train_size*100)}"
+
+---
+validation_data: 
+test_data: 
+save_datasets_format: 
+csv_encoding: 
+csv_delimiter: 
+codegen_only: 
diff --git a/sapientml_automl/setta_files/setta_export/with_variants/hotel/[email protected] b/sapientml_automl/setta_files/setta_export/with_variants/hotel/[email protected]
@@ -0,0 +1,13 @@
+sml.fit
+---
+training_data: train_data_split[0]
+ignore_columns: 
+output_dir: f"outputs_{version}_{int(train_size*100)}"
+
+---
+validation_data: 
+test_data: 
+save_datasets_format: 
+csv_encoding: 
+csv_delimiter: 
+codegen_only: 
diff --git a/...entml_automl/setta_files/setta_export/with_variants/hotel/resonant_tomato@innocent_tan.py b/...entml_automl/setta_files/setta_export/with_variants/hotel/resonant_tomato@innocent_tan.py
@@ -0,0 +1,18 @@
+import pandas as pd
+
+def evaluate(sml, data, target_column, score_fn_info):
+    y_true = data[target_column].reset_index(drop=True)
+    data.drop([target_column], axis=1, inplace=True)
+    y_pred = sml.predict(data)
+    y_pred = y_pred[target_column].rename(f"{target_column}_pred")
+    pd.concat([y_pred, y_true], axis=1)
+    score_fn = score_fn_info.pop("fn")
+    print(f"Score: {score_fn(y_true, y_pred, **score_fn_info)}")
+
+def train_and_evaluate():
+    version = ($Global Variables$version).split("@")[1]
+    $SETTA_GENERATED_PYTHON
+    evaluate(sml, test_data, target_column, score_fn_info)
+
+
+train_and_evaluate()
diff --git a/sapientml_automl/setta_files/setta_export/with_variants/hotel/[email protected] b/sapientml_automl/setta_files/setta_export/with_variants/hotel/[email protected]
@@ -0,0 +1,2 @@
+fn: sklearn.metrics.f1_score
+pos_label: "C"
diff --git a/sapientml_automl/setta_files/setta_export/with_variants/hotel/[email protected] b/sapientml_automl/setta_files/setta_export/with_variants/hotel/[email protected]
@@ -0,0 +1 @@
+fn: sklearn.metrics.r2_score
diff --git a/sapientml_automl/setta_files/setta_export/with_variants/hotel/[email protected] b/sapientml_automl/setta_files/setta_export/with_variants/hotel/[email protected]
@@ -0,0 +1,18 @@
+sapientml.SapientML
+---
+n_models: 5
+hyperparameter_tuning: true
+hyperparameter_tuning_n_trials: 10
+
+---
+target_columns: [target_column]
+task_type: 
+adaptation_metric: 
+split_method: 
+split_seed: 
+split_train_size: 
+split_column_name: 
+time_split_num: 
+time_split_index: 
+split_stratification: 
+model_type: 
diff --git a/sapientml_automl/setta_files/setta_export/with_variants/hotel/[email protected] b/sapientml_automl/setta_files/setta_export/with_variants/hotel/[email protected]
@@ -0,0 +1,18 @@
+sapientml.SapientML
+---
+n_models: 3
+hyperparameter_tuning: true
+hyperparameter_tuning_n_trials: 30
+
+---
+target_columns: [target_column]
+task_type: 
+adaptation_metric: 
+split_method: 
+split_seed: 
+split_train_size: 
+split_column_name: 
+time_split_num: 
+time_split_index: 
+split_stratification: 
+model_type: 
diff --git a/sapientml_automl/setta_files/setta_export/with_variants/hotel/test_data@decent_lime.yaml b/sapientml_automl/setta_files/setta_export/with_variants/hotel/test_data@decent_lime.yaml
@@ -0,0 +1,52 @@
+pd.read_csv
+---
+
+---
+filepath_or_buffer: test_path
+sep: 
+delimiter: 
+header: 
+names: 
+index_col: 
+usecols: 
+dtype: 
+engine: 
+converters: 
+true_values: 
+false_values: 
+skipinitialspace: 
+skiprows: 
+skipfooter: 
+nrows: 
+na_values: 
+na_filter: 
+verbose: 
+skip_blank_lines: 
+parse_dates: 
+infer_datetime_format: 
+keep_date_col: 
+date_parser: 
+date_format: 
+dayfirst: 
+cache_dates: 
+iterator: 
+chunksize: 
+compression: 
+thousands: 
+decimal: 
+lineterminator: 
+quotechar: 
+quoting: 
+doublequote: 
+escapechar: 
+comment: 
+encoding: 
+encoding_errors: 
+dialect: 
+on_bad_lines: 
+delim_whitespace: 
+low_memory: 
+memory_map: 
+float_precision: 
+storage_options: 
+dtype_backend: 
diff --git a/sapientml_automl/setta_files/setta_export/with_variants/hotel/train_data@aware_coral.yaml b/sapientml_automl/setta_files/setta_export/with_variants/hotel/train_data@aware_coral.yaml
@@ -0,0 +1,52 @@
+pd.read_csv
+---
+
+---
+filepath_or_buffer: train_path
+sep: 
+delimiter: 
+header: 
+names: 
+index_col: 
+usecols: 
+dtype: 
+engine: 
+converters: 
+true_values: 
+false_values: 
+skipinitialspace: 
+skiprows: 
+skipfooter: 
+nrows: 
+na_values: 
+na_filter: 
+verbose: 
+skip_blank_lines: 
+parse_dates: 
+infer_datetime_format: 
+keep_date_col: 
+date_parser: 
+date_format: 
+dayfirst: 
+cache_dates: 
+iterator: 
+chunksize: 
+compression: 
+thousands: 
+decimal: 
+lineterminator: 
+quotechar: 
+quoting: 
+doublequote: 
+escapechar: 
+comment: 
+encoding: 
+encoding_errors: 
+dialect: 
+on_bad_lines: 
+delim_whitespace: 
+low_memory: 
+memory_map: 
+float_precision: 
+storage_options: 
+dtype_backend: 
diff --git a/sapientml_automl/setta_files/setta_export/with_variants/hotel/[email protected] b/sapientml_automl/setta_files/setta_export/with_variants/hotel/[email protected]
@@ -0,0 +1,10 @@
+sklearn.model_selection.train_test_split
+---
+train_size: 0.8
+
+---
+arrays: [train_data]
+test_size: 
+random_state: 
+shuffle: 
+stratify: 
diff --git a/...entml_automl/setta_files/setta_export/with_variants/hotel/[email protected] b/...entml_automl/setta_files/setta_export/with_variants/hotel/[email protected]
@@ -0,0 +1,10 @@
+sklearn.model_selection.train_test_split
+---
+train_size: 0.6
+
+---
+arrays: [train_data]
+test_size: 
+random_state: 
+shuffle: 
+stratify: 
diff --git a/...entml_automl/setta_files/setta_export/with_variants/insurance/Global [email protected] b/...entml_automl/setta_files/setta_export/with_variants/insurance/Global [email protected]
@@ -0,0 +1,4 @@
+root_url: "https://github.com/sapientml/sapientml/files/"
+train_path: f"{root_url}12617021/train_hotelcancel-prediction.csv"
+test_path: f"{root_url}12617033/test_hotelcancel-prediction.csv"
+target_column: "Status"
diff --git a/...l_automl/setta_files/setta_export/with_variants/insurance/Global [email protected] b/...l_automl/setta_files/setta_export/with_variants/insurance/Global [email protected]
@@ -0,0 +1,4 @@
+root_url: "https://github.com/sapientml/sapientml/files/"
+train_path: f"{root_url}12617660/train_medical-insurance-prediction.csv"
+test_path: f"{root_url}12617696/test_medical-insurance-prediction.csv"
+target_column: "charges"
diff --git a/sapientml_automl/setta_files/setta_export/with_variants/insurance/[email protected] b/sapientml_automl/setta_files/setta_export/with_variants/insurance/[email protected]
@@ -0,0 +1,13 @@
+sml.fit
+---
+training_data: train_data_split[0]
+ignore_columns: ["No."]
+output_dir: f"outputs_{version}_{int(train_size*100)}"
+
+---
+validation_data: 
+test_data: 
+save_datasets_format: 
+csv_encoding: 
+csv_delimiter: 
+codegen_only: 
diff --git a/sapientml_automl/setta_files/setta_export/with_variants/insurance/[email protected] b/sapientml_automl/setta_files/setta_export/with_variants/insurance/[email protected]
@@ -0,0 +1,13 @@
+sml.fit
+---
+training_data: train_data_split[0]
+ignore_columns: 
+output_dir: f"outputs_{version}_{int(train_size*100)}"
+
+---
+validation_data: 
+test_data: 
+save_datasets_format: 
+csv_encoding: 
+csv_delimiter: 
+codegen_only: 
diff --git a/...l_automl/setta_files/setta_export/with_variants/insurance/resonant_tomato@innocent_tan.py b/...l_automl/setta_files/setta_export/with_variants/insurance/resonant_tomato@innocent_tan.py
@@ -0,0 +1,18 @@
+import pandas as pd
+
+def evaluate(sml, data, target_column, score_fn_info):
+    y_true = data[target_column].reset_index(drop=True)
+    data.drop([target_column], axis=1, inplace=True)
+    y_pred = sml.predict(data)
+    y_pred = y_pred[target_column].rename(f"{target_column}_pred")
+    pd.concat([y_pred, y_true], axis=1)
+    score_fn = score_fn_info.pop("fn")
+    print(f"Score: {score_fn(y_true, y_pred, **score_fn_info)}")
+
+def train_and_evaluate():
+    version = ($Global Variables$version).split("@")[1]
+    $SETTA_GENERATED_PYTHON
+    evaluate(sml, test_data, target_column, score_fn_info)
+
+
+train_and_evaluate()
diff --git a/sapientml_automl/setta_files/setta_export/with_variants/insurance/[email protected] b/sapientml_automl/setta_files/setta_export/with_variants/insurance/[email protected]
@@ -0,0 +1,2 @@
+fn: sklearn.metrics.f1_score
+pos_label: "C"
diff --git a/...ntml_automl/setta_files/setta_export/with_variants/insurance/[email protected] b/...ntml_automl/setta_files/setta_export/with_variants/insurance/[email protected]
@@ -0,0 +1 @@
+fn: sklearn.metrics.r2_score
diff --git a/sapientml_automl/setta_files/setta_export/with_variants/insurance/[email protected] b/sapientml_automl/setta_files/setta_export/with_variants/insurance/[email protected]
@@ -0,0 +1,18 @@
+sapientml.SapientML
+---
+n_models: 5
+hyperparameter_tuning: true
+hyperparameter_tuning_n_trials: 10
+
+---
+target_columns: [target_column]
+task_type: 
+adaptation_metric: 
+split_method: 
+split_seed: 
+split_train_size: 
+split_column_name: 
+time_split_num: 
+time_split_index: 
+split_stratification: 
+model_type: 
diff --git a/sapientml_automl/setta_files/setta_export/with_variants/insurance/[email protected] b/sapientml_automl/setta_files/setta_export/with_variants/insurance/[email protected]
@@ -0,0 +1,18 @@
+sapientml.SapientML
+---
+n_models: 3
+hyperparameter_tuning: true
+hyperparameter_tuning_n_trials: 30
+
+---
+target_columns: [target_column]
+task_type: 
+adaptation_metric: 
+split_method: 
+split_seed: 
+split_train_size: 
+split_column_name: 
+time_split_num: 
+time_split_index: 
+split_stratification: 
+model_type: 
diff --git a/sapientml_automl/setta_files/setta_export/with_variants/insurance/test_data@decent_lime.yaml b/sapientml_automl/setta_files/setta_export/with_variants/insurance/test_data@decent_lime.yaml
@@ -0,0 +1,52 @@
+pd.read_csv
+---
+
+---
+filepath_or_buffer: test_path
+sep: 
+delimiter: 
+header: 
+names: 
+index_col: 
+usecols: 
+dtype: 
+engine: 
+converters: 
+true_values: 
+false_values: 
+skipinitialspace: 
+skiprows: 
+skipfooter: 
+nrows: 
+na_values: 
+na_filter: 
+verbose: 
+skip_blank_lines: 
+parse_dates: 
+infer_datetime_format: 
+keep_date_col: 
+date_parser: 
+date_format: 
+dayfirst: 
+cache_dates: 
+iterator: 
+chunksize: 
+compression: 
+thousands: 
+decimal: 
+lineterminator: 
+quotechar: 
+quoting: 
+doublequote: 
+escapechar: 
+comment: 
+encoding: 
+encoding_errors: 
+dialect: 
+on_bad_lines: 
+delim_whitespace: 
+low_memory: 
+memory_map: 
+float_precision: 
+storage_options: 
+dtype_backend: