Skip to content

Commit 4ccfc2c

Browse files
committed
finished training pipeline, will build app.py to trigger it
1 parent a6504e8 commit 4ccfc2c

File tree

8 files changed

+139
-9
lines changed

8 files changed

+139
-9
lines changed

app.py

Whitespace-only changes.

final_model/model.pkl

470 KB
Binary file not shown.

final_model/transformer.pkl

2.28 MB
Binary file not shown.

network_security/components/data_transformation.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,9 @@ def initiate_data_transformation(self) -> DataTransformationArtifact:
7777
save_numpy_array(self.data_transformation_config.transformed_test_file_path, array=test_nparray)
7878
save_object(self.data_transformation_config.transformed_object_file_path, obj=knn_processor_obj)
7979

80+
# saving the final transformation object in comomn directory with model.pkl
81+
save_object("final_model/transformer.pkl", obj=knn_processor_obj)
82+
8083
# preparing artifacts
8184
data_transformation_artifact = DataTransformationArtifact(
8285
transformation_object_path = self.data_transformation_config.transformed_object_file_path,

network_security/components/model_trainer.py

Lines changed: 31 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,8 @@
99
)
1010
from network_security.entity.artifact_entity import (
1111
DataTransformationArtifact,
12-
ModelTrainerArtifact
12+
ModelTrainerArtifact,
13+
ClassificationMetricArtifact
1314
)
1415
from network_security.utils.main_utils.utils import (
1516
save_object,
@@ -28,11 +29,16 @@
2829
)
2930
from xgboost import XGBClassifier
3031
from sklearn.metrics import r2_score
32+
import mlflow
33+
import dagshub
3134

3235
import pandas as pd
3336
import numpy as np
3437
import os, sys
3538

39+
import dagshub
40+
dagshub.init(repo_owner='pycoder49', repo_name='networkSecuritySystem', mlflow=True)
41+
3642

3743
class ModelTrainer:
3844
def __init__(self, model_trainer_config: ModelTrainerConfig,
@@ -43,7 +49,24 @@ def __init__(self, model_trainer_config: ModelTrainerConfig,
4349
except Exception as e:
4450
raise NetworkSecurityException(e, sys)
4551

52+
def track_mlflow(self, model,
53+
train_metric: ClassificationMetricArtifact
54+
):
55+
try:
56+
with mlflow.start_run():
57+
f1_score = train_metric.f1_score
58+
precision = train_metric.precision_score
59+
recall = train_metric.recall_score
60+
61+
mlflow.log_metric("F1_Score", f1_score)
62+
mlflow.log_metric("Precision", precision)
63+
mlflow.log_metric("Recall", recall)
4664

65+
# Use artifact_path for DagHub compatibility
66+
mlflow.sklearn.log_model(model, "model")
67+
68+
except Exception as e:
69+
raise NetworkSecurityException(e, sys)
4770

4871
def train_model(self, X_train, y_train, X_test, y_test) -> object:
4972
try:
@@ -124,15 +147,15 @@ def train_model(self, X_train, y_train, X_test, y_test) -> object:
124147
best_model_score = max(sorted(model_report.values()))
125148
best_model = models[best_model_name]
126149

150+
# getting the train classification metrics and tracking using MLFlow
127151
y_train_pred = best_model.predict(X_train)
128152
train_classification_metric = get_classification_score(y_true=y_train, y_pred=y_train_pred)
153+
self.track_mlflow(best_model, train_classification_metric)
129154

130-
# tracking the MLFlow
131-
132-
133-
# getting the test classification metrics
155+
# getting the test classification metrics and tracking using MLFlow
134156
y_test_pred = best_model.predict(X_test)
135157
test_classification_metric = get_classification_score(y_true=y_test, y_pred=y_test_pred)
158+
self.track_mlflow(best_model, test_classification_metric)
136159

137160
# loading the object, saving it
138161
preprocessor = load_object(file_path=self.data_transformation_artifact.transformation_object_path)
@@ -146,6 +169,9 @@ def train_model(self, X_train, y_train, X_test, y_test) -> object:
146169
obj=network_model
147170
)
148171

172+
# saving the final best model in a common directory with preprocessor.pkl
173+
save_object("final_model/model.pkl", best_model)
174+
149175
# saving the model trainer artifact
150176
model_trainer_artifact = ModelTrainerArtifact(
151177
trained_model_file_path=self.model_trainer_config.trained_model_file_path,

network_security/pipeline/batch_prediction.py

Whitespace-only changes.
Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,99 @@
1+
from network_security.components.data_ingestion import DataIngestion
2+
from network_security.components.data_validation import DataValidation
3+
from network_security.components.data_transformation import DataTransformation
4+
from network_security.components.model_trainer import ModelTrainer
5+
6+
from network_security.entity.config_entity import (
7+
TrainingPipelineConfig,
8+
DataIngestionConfig,
9+
DataValidationConfig,
10+
DataTransformationConfig,
11+
ModelTrainerConfig
12+
)
13+
14+
from network_security.entity.artifact_entity import (
15+
DataIngestionArtifact,
16+
DataValidationArtifact,
17+
DataTransformationArtifact,
18+
ModelTrainerArtifact
19+
)
20+
21+
from network_security.exceptions.exception import NetworkSecurityException
22+
from network_security.logging.logger import logging
23+
24+
import os, sys
25+
26+
class TrainingPipeline:
27+
def __init__(self):
28+
self.training_pipeline_config = TrainingPipelineConfig()
29+
30+
def start_data_ingestion(self) -> DataIngestionArtifact:
31+
try:
32+
self.data_ingestion_config = DataIngestionConfig(self.training_pipeline_config)
33+
logging.info("Starting data ingestion")
34+
35+
data_ingestion = DataIngestion(data_ingestion_config=self.data_ingestion_config)
36+
data_ingestion_artifact = data_ingestion.initiate_data_ingestion()
37+
logging.info("Data ingestion completed")
38+
39+
return data_ingestion_artifact
40+
except Exception as e:
41+
raise NetworkSecurityException(e, sys)
42+
43+
def start_data_validation(self, data_ingestion_artifact: DataIngestionArtifact) -> DataValidationArtifact:
44+
try:
45+
self.data_validation_config = DataValidationConfig(self.training_pipeline_config)
46+
logging.info("Starting data validation")
47+
48+
data_validation = DataValidation(
49+
data_ingestion_artifact=data_ingestion_artifact,
50+
data_validation_config=self.data_validation_config
51+
)
52+
data_validation_artifact = data_validation.initiate_data_validation()
53+
logging.info("Data validation completed")
54+
55+
return data_validation_artifact
56+
except Exception as e:
57+
raise NetworkSecurityException(e, sys)
58+
59+
def start_data_transformation(self, data_validation_artifact: DataValidationArtifact) -> DataTransformationArtifact:
60+
try:
61+
self.data_transofmation_config = DataTransformationConfig(self.training_pipeline_config)
62+
logging.info("Starting data transformation")
63+
64+
data_transformation = DataTransformation(
65+
data_validation_artifact=data_validation_artifact,
66+
data_transformation_config=self.data_transformation_config
67+
)
68+
data_transformation_artifact = data_transformation.initiate_data_transformation()
69+
logging.info("Data transformation completed")
70+
71+
return data_transformation_artifact
72+
except Exception as e:
73+
raise NetworkSecurityException(e, sys)
74+
75+
def start_model_trainer(self, data_transformation_artifact: DataTransformationArtifact) -> ModelTrainerArtifact:
76+
try:
77+
self.model_trainer_config = ModelTrainerConfig(self.training_pipeline_config)
78+
logging.info("Starting model trainer")
79+
80+
model_trainer = ModelTrainer(
81+
model_trainer_config=self.model_trainer_config,
82+
data_transformation_artifact=data_transformation_artifact
83+
)
84+
model_trainer_artifact = model_trainer.initiate_model_trainer()
85+
logging.info("Model trainer completed")
86+
87+
return model_trainer_artifact
88+
except Exception as e:
89+
raise NetworkSecurityException(e, sys)
90+
91+
def run_pipeline(self):
92+
try:
93+
data_ingestion_artifact = self.start_data_ingestion()
94+
data_validation_artifact = self.start_data_validation(data_ingestion_artifact=data_ingestion_artifact)
95+
data_transformation_artifact = self.start_data_transformation(data_validation_artifact=data_validation_artifact)
96+
model_trainer_artifact = self.start_model_trainer(data_transformation_artifact=data_transformation_artifact)
97+
98+
except Exception as e:
99+
raise NetworkSecurityException(e, sys)

requirements.txt

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,13 @@
11
python-dotenv
22
pandas
3-
numpy
4-
pymongo
5-
certifi
3+
numpy>=1.24.0
64
pymongo[srv]==3.12
7-
pymongo
5+
certifi
86
scikit-learn
7+
mlflow<2.16
8+
dagshub
99
dill
1010
pyaml
11+
xgboost
12+
scipy
1113
# -e .

0 commit comments

Comments
 (0)