-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathDDOS attack detection and prevention.R
136 lines (104 loc) · 4.83 KB
/
DDOS attack detection and prevention.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
library(tidyverse)
library(readr)
library(mltools)
library(caret)
library(kernlab)
library(e1071)
library(tictoc)
library(lattice)
library(randomForest)
set.seed(123)
# Load the dataset
data <- read_csv("C:/Users/user/Downloads/data_bal.csv")
# Sample 10000 rows from the dataset
data_sampled <- data %>% sample_n(10000, replace = TRUE)
data_sampled$label <- factor(data_sampled$label, levels=c(1,0), labels=c('benign', 'malicious'))
# Remove variables with zero variance
nzv <- nearZeroVar(data_sampled)
data_sampled <- data_sampled[, -nzv]
# Visualize the number of benign and malicious rows in dataset
ggplot(data_sampled, aes(label, fill=label1)) + geom_bar() + ggtitle('Number of benign and malicious rows in dataset')
# Split the data into training and testing sets
set.seed(123)
train_index <- createDataPartition(data_sampled$label, p=0.8, list=FALSE)
train_data <- data_sampled[train_index,]
test_data <- data_sampled[-train_index,]
# Set up the tuning grid
#tuning_grid <- data.frame(degree = 1:80, scale = c(0.1, 1, 10), C = 2^(-2:2))
# Train the model
model <- train(label~., data = train_data,
method = "svmPoly",
na.action = na.omit,
preProcess = c("scale", "center"),
trControl = trainControl(method = "none",
tuning_grid <- expand.grid(degree = 1:3, scale = c(0.1, 1, 10), C = 2^(-2:2))
)
)
# Make predictions using the models
model_training <- predict(model, train_data) # making predictions for training set
model_testing <- predict(model, test_data) # making predictions for testing set
# Model performance (display confusion matrix and stats)
model_training_confusion <- confusionMatrix(model_training, train_data$label)
model_testing_confusion <- confusionMatrix(model_testing, test_data$label)
#print result
print(model_training_confusion)
print(model_testing_confusion)
#Plot the confusion matrix heatmap for the testing set
# Get the confusion matrix for the testing set
conf_mat <- model_testing_confusion$table
# Convert the confusion matrix to a data frame
conf_mat_df <- as.data.frame.matrix(conf_mat)
# Add row and column names to the data frame
conf_mat_df <- cbind(Class = rownames(conf_mat_df), conf_mat_df)
rownames(conf_mat_df) <- NULL
# Reshape the data frame for plotting
conf_mat_df_long <- tidyr::gather(conf_mat_df, key = "Prediction", value = "Count", -Class)
# Plot the heatmap
ggplot(conf_mat_df_long, aes(x = Prediction, y = Class, fill = Count)) +
geom_tile() +
geom_text(aes(label = Count)) +
scale_fill_gradient(low = "white", high = "steelblue") +
labs(x = "Predicted", y = "Actual", title = "Confusion Matrix Heatmap for Testing Set") +
theme_minimal()
# Calculate accuracy and f1_score for training set
train_accuracy <- confusionMatrix(model_training, train_data$label)$overall['Accuracy']
train_f1_score <- confusionMatrix(model_training, train_data$label)$byClass['F1']
# Calculate accuracy and f1_score for testing set
test_accuracy <- confusionMatrix(model_testing, test_data$label)$overall['Accuracy']
test_f1_score <- confusionMatrix(model_testing, test_data$label)$byClass['F1']
# Create a data frame with the metrics
df_metrics <- data.frame(
Metric = c('Accuracy', 'F1 Score'),
Training = c(train_accuracy, train_f1_score),
Testing = c(test_accuracy, test_f1_score)
)
# Reshape data to long format for plotting
df_metrics_long <- df_metrics %>% pivot_longer(cols = c('Training', 'Testing'), names_to = 'Set', values_to = 'Value')
# Plot bar chart
ggplot(df_metrics_long, aes(Metric, Value, fill = Set)) +
geom_col(position = position_dodge()) +
scale_fill_manual(values = c('blue', 'orange')) +
ggtitle('Model Performance Metrics') +
theme_minimal()
library(pROC)
library(pROC)
# Fit the model on the full dataset (not just the training set)
model_full <- ksvm(label~., data = data_sampled,
kernel = "poly",
degree = 1:3,
scale = c(0.1, 1, 10),
C = 2^(-2:2),
prob.model = TRUE, # enable probability estimates
na.action = na.omit,
preProcess = c("scale", "center")
)
# Obtain predicted probabilities for training and testing sets
model_training_probs <- predict(model_full, train_data, type = "prob")[, "malicious"]
model_testing_probs <- predict(model_full, test_data, type = "prob")[, "malicious"]
# Plot the ROC curve
roc_train <- roc(train_data$label, model_training_probs)
roc_test <- roc(test_data$label, model_testing_probs)
plot(roc_train, col = "blue", main = "ROC Curve", sub = "Training Set", print.auc = TRUE)
lines(roc_test, col = "red", print.auc = TRUE)
legend("bottomright", legend = c("Training Set", "Testing Set"),
col = c("blue", "red"), lwd = 2)