1+ from network_security .exceptions .exception import NetworkSecurityException
2+ from network_security .logging .logger import logging
3+ from network_security .utils .ml_utils .model .estimator import NetworkModel
4+ from network_security .utils .ml_utils .metric .classification_metric import get_classification_score
5+ import network_security .constants .training_pipeline as tp
6+ from network_security .entity .config_entity import (
7+ DataTransformationConfig ,
8+ ModelTrainerConfig
9+ )
10+ from network_security .entity .artifact_entity import (
11+ DataTransformationArtifact ,
12+ ModelTrainerArtifact
13+ )
14+ from network_security .utils .main_utils .utils import (
15+ save_object ,
16+ load_object ,
17+ load_numpy_array ,
18+ evaluate_models
19+ )
20+
21+ from sklearn .linear_model import LogisticRegression
22+ from sklearn .neighbors import KNeighborsClassifier
23+ from sklearn .tree import DecisionTreeClassifier
24+ from sklearn .ensemble import (
25+ AdaBoostClassifier ,
26+ GradientBoostingClassifier ,
27+ RandomForestClassifier ,
28+ )
29+ from xgboost import XGBClassifier
30+ from sklearn .metrics import r2_score
31+
32+ import pandas as pd
33+ import numpy as np
34+ import os , sys
35+
36+
37+ class ModelTrainer :
38+ def __init__ (self , model_trainer_config : ModelTrainerConfig ,
39+ data_transformation_artifact : DataTransformationArtifact ):
40+ try :
41+ self .model_trainer_config = model_trainer_config
42+ self .data_transformation_artifact = data_transformation_artifact
43+ except Exception as e :
44+ raise NetworkSecurityException (e , sys )
45+
46+
47+
48+ def train_model (self , X_train , y_train , X_test , y_test ) -> object :
49+ try :
50+ logging .info ("Training the model" )
51+
52+ # intializing models
53+ models = {
54+ "LogisticRegression" : LogisticRegression (max_iter = 1000 ),
55+ "KNeighborsClassifier" : KNeighborsClassifier (),
56+ "DecisionTreeClassifier" : DecisionTreeClassifier (),
57+ "RandomForestClassifier" : RandomForestClassifier (),
58+ "AdaBoostClassifier" : AdaBoostClassifier (),
59+ "GradientBoostingClassifier" : GradientBoostingClassifier (),
60+ "XGBClassifier" : XGBClassifier ()
61+ }
62+
63+ # defining parameters for hyperparameter tuning
64+ params = {
65+ "DecisionTreeClassifier" : {
66+ "criterion" : ['gini' , 'entropy' ],
67+ # "splitter": ['best', 'random'],
68+ # "max_features": ['sqrt', 'log2', None],
69+ # "max_depth": [3, 5, 10, 15, 20, None]
70+ },
71+ "RandomForestClassifier" : {
72+ # "criterion": ['gini', 'entropy', "log_loss"],
73+ # "max_features": ['sqrt', 'log2', None],
74+ "n_estimators" : [50 , 100 , 200 ],
75+ "max_depth" : [3 , 5 , 10 , 15 , 20 , None ]
76+ },
77+ "GradientBoostingClassifier" : {
78+ "loss" : ['log_loss' , 'exponential' ],
79+ # "learning_rate": [0.1, 0.01, 0.001, 0.05],
80+ # "subsample": [0.6, 0.7, 0.75, 0.8, 0.85, 0.9],
81+ # "criterion": ['friedman_mse', 'squared_error'],
82+ # "max_features": ['sqrt', 'log2', None],
83+ "n_estimators" : [50 , 100 , 200 ],
84+ # "max_depth": [3, 5, 10]
85+ },
86+ "LogisticRegression" : {
87+ # "penalty": ['l1', 'l2', 'elasticnet', None],
88+ # "C": [0.01, 0.1, 1.0, 10.0, 100.0],
89+ # "solver": ['newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga'],
90+ "max_iter" : [100 , 200 , 500 ]
91+ },
92+ "KNeighborsClassifier" : {
93+ "n_neighbors" : [3 , 5 , 7 , 9 , 11 ],
94+ # "weights": ['uniform', 'distance'],
95+ # "algorithm": ['auto', 'ball_tree', 'kd_tree', 'brute'],
96+ # "p": [1, 2],
97+ # "leaf_size": [10, 20, 30, 40, 50]
98+ },
99+ "AdaBoostClassifier" : {
100+ "n_estimators" : [50 , 100 , 200 ],
101+ "learning_rate" : [0.1 , 0.01 , 0.001 , 0.05 , 1.0 ],
102+ # "algorithm": ['SAMME', 'SAMME.R']
103+ },
104+ "XGBClassifier" : {
105+ "n_estimators" : [50 , 100 , 200 ],
106+ # "learning_rate": [0.1, 0.01, 0.001, 0.05],
107+ # "max_depth": [3, 5, 7, 9],
108+ # "gamma": [0, 0.1, 0.2],
109+ # "subsample": [0.6, 0.7, 0.8, 0.9]
110+ }
111+ }
112+
113+ model_report : dict = evaluate_models (
114+ X_train = X_train ,
115+ y_train = y_train ,
116+ X_test = X_test ,
117+ y_test = y_test ,
118+ models = models ,
119+ params = params
120+ )
121+
122+ # getting the best model score from the report
123+ best_model_name = max (model_report , key = model_report .get )
124+ best_model_score = max (sorted (model_report .values ()))
125+ best_model = models [best_model_name ]
126+
127+ y_train_pred = best_model .predict (X_train )
128+ train_classification_metric = get_classification_score (y_true = y_train , y_pred = y_train_pred )
129+
130+ # tracking the MLFlow
131+
132+
133+ # getting the test classification metrics
134+ y_test_pred = best_model .predict (X_test )
135+ test_classification_metric = get_classification_score (y_true = y_test , y_pred = y_test_pred )
136+
137+ # loading the object, saving it
138+ preprocessor = load_object (file_path = self .data_transformation_artifact .transformation_object_path )
139+ model_dir_path = os .path .dirname (self .model_trainer_config .trained_model_file_path )
140+ os .makedirs (model_dir_path , exist_ok = True )
141+
142+ # saving the object
143+ network_model = NetworkModel (preprocessor = preprocessor , model = best_model )
144+ save_object (
145+ file_path = self .model_trainer_config .trained_model_file_path ,
146+ obj = network_model
147+ )
148+
149+ # saving the model trainer artifact
150+ model_trainer_artifact = ModelTrainerArtifact (
151+ trained_model_file_path = self .model_trainer_config .trained_model_file_path ,
152+ train_metric_artifact = train_classification_metric ,
153+ test_metric_artifact = test_classification_metric
154+ )
155+ return model_trainer_artifact
156+ except Exception as e :
157+ raise NetworkSecurityException (e , sys )
158+
159+
160+
161+
162+
163+ def initiate_model_trainer (self ) -> ModelTrainerArtifact :
164+ try :
165+ logging .info ("Initiating model trainer" )
166+ train_file_path = self .data_transformation_artifact .transformed_train_file_path
167+ test_file_path = self .data_transformation_artifact .transformed_test_file_path
168+
169+ # loading the training and testing arrays
170+ training_array = load_numpy_array (file_path = train_file_path )
171+ testing_array = load_numpy_array (file_path = test_file_path )
172+
173+ # splitting the training and testing arrays into input and target feature arrays
174+ X_train , y_train = training_array [:, :- 1 ], training_array [:, - 1 ]
175+ X_test , y_test = testing_array [:, :- 1 ], testing_array [:, - 1 ]
176+
177+ # creating model
178+ model = self .train_model (X_train , y_train , X_test , y_test )
179+
180+ logging .info ("Model training completed" )
181+ return model
182+ except Exception as e :
183+ raise NetworkSecurityException (e , sys )
0 commit comments