-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathpredict.py
66 lines (45 loc) · 1.87 KB
/
predict.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
import os
import sys
import pandas as pd
import pickle as pkl
import time
INPUT_PATH = "input"
FILE_NAME = sys.argv[1]
OUTPUT_PATH = "output"
MODELS_PATH = "models"
def main():
df = pd.read_csv(os.path.join(INPUT_PATH, FILE_NAME))
# Transformation
# Remove features
df2 = df.copy()
col_remove = ["gender", "PhoneService", "MultipleLines", "customerID"]
df2 = df2.drop(col_remove, axis=1)
# Convert text to binary
col_text2binary = ["SeniorCitizen", "Partner", "Dependents",
"PaperlessBilling"]
for col in col_text2binary:
df2[col] = df2[col].apply(lambda x: 1 if x == "Yes" else 0)
# One hot encoding
with open(os.path.join(MODELS_PATH, "onehotenc.pkl"), "rb") as f:
enc = pkl.load(f)
col_text2onehot = ["InternetService", "OnlineSecurity", "OnlineBackup",
"DeviceProtection", "TechSupport", "StreamingTV",
"StreamingMovies", "Contract", "PaymentMethod"]
onehot_output = enc.fit_transform(df2[col_text2onehot]).toarray()
df3 = pd.DataFrame(data=onehot_output, columns=enc.get_feature_names_out(col_text2onehot))
# Combine binary with one hot
X_test = pd.concat([df2[col_text2binary], df3], axis=1)
# Perform inference using Sklearn Pipeline
with open(os.path.join(MODELS_PATH, "svm_minmax.pkl"), "rb") as f:
pipe_logr = pkl.load(f)
y_pred = pipe_logr.predict(X_test.values)
df_test = pd.DataFrame(data=y_pred, columns=["Churn"])
df_test["Churn"] = df_test["Churn"].apply(lambda x: "Yes" if x == 1 else "No")
df = pd.concat([df, df_test], axis=1)
t = time.localtime()
current_time = time.strftime("%Y-%m-%d_%H-%M-%S", t)
SAVED_FILEPATH = os.path.join(OUTPUT_PATH, f"output_{current_time}.csv")
df.to_csv(SAVED_FILEPATH, index=False)
print(SAVED_FILEPATH)
if __name__ == "__main__":
main()