Skip to content

Commit 13000f7

Browse files
committed
more or less done
1 parent ade65da commit 13000f7

19 files changed

+165
-191
lines changed
4.67 KB
Binary file not shown.

__pycache__/models.cpython-39.pyc

3.33 KB
Binary file not shown.

__pycache__/utils.cpython-39.pyc

433 Bytes
Binary file not shown.

client.py

Lines changed: 0 additions & 37 deletions
This file was deleted.

report/create_report.py renamed to create_report.py

Lines changed: 21 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -2,34 +2,27 @@
22
from datetime import datetime
33

44

5-
width = 210
6-
height = 297
7-
degree = 2
8-
split = 20
9-
points = 100
10-
training_points = 80
11-
testing_points = 20
125

13-
dataset = "dataste.csv"
146

157

16-
file_name = "report.pdf"
17-
188
pdf = FPDF()
199

20-
def section(string):
10+
def section(string, pdf=pdf):
2111
pdf.set_font('Courier','B', 14)
2212
pdf.cell(0,7,string, ln = 2)
2313

24-
def line(string):
14+
def line(string, pdf= pdf):
2515
pdf.set_font('Courier', '', 11)
2616
pdf.cell(0,5,string, ln = 2)
2717

28-
def multi_line(string):
18+
def multi_line(string, pdf=pdf):
2919
pdf.set_font('Courier', '', 11)
3020
pdf.multi_cell(0,5, string)
3121

32-
def create_report():
22+
def c_report(tot, maxi, mini, mean, medi, sd, training_points, testing_points, degree, split, user_dir):
23+
file_name = "report.pdf"
24+
width = 210
25+
height = 297
3326
pdf.set_margins(15,15)
3427
pdf.set_title('Regression Report')
3528
pdf.set_font('Helvetica', '', 14)
@@ -47,39 +40,39 @@ def create_report():
4740

4841
line("")
4942
section("User Defined Parameters")
50-
line(f"Dataset: {dataset}")
43+
line(f"Dataset: received.txt")
5144
line(f"Polynomial Degree = {degree}")
5245
line(f"Train-test-split = {split}%")
5346
line("")
5447

5548
section("Dataset Exploration")
56-
line(f"Total Points = {points}")
49+
line(f"Total Points = {tot}")
5750
line(f"Training Points = {training_points}")
5851
line(f"Testing Points = {testing_points}")
59-
line(f"Max = {testing_points}")
60-
line(f"Min = {testing_points}")
61-
line(f"Mean = {testing_points}")
62-
line(f"Median = {testing_points}")
63-
line(f"Standard Deviation = {testing_points}")
52+
line(f"Max = {maxi}")
53+
line(f"Min = {mini}")
54+
line(f"Mean = {mean}")
55+
line(f"Median = {medi}")
56+
line(f"Standard Deviation = {sd}")
6457
line("")
6558

6659
section("Results")
6760

6861
section("1. Polynomial Fitting")
69-
pdf.image("./server_data/fitting.png",50,140,w=120, h = 72)
62+
pdf.image(f"{user_dir}/fitting.png",50,140,w=120, h = 72)
7063
pdf.ln(84)
7164
multi_line("The above figure represents the regression line being fitted to the training instances. If the regression line passes through all the points then the polynomial degree chosen overfits the data. Similarly, if the line does not touch even a single point, you might be underfitting the training instances.")
7265

7366
pdf.add_page()
7467
section("2. Actual vs Predicted")
75-
pdf.image("./server_data/actual_pred.png",50,20,w=120, h = 72)
68+
pdf.image(f"{user_dir}/actual_pred.png",50,20,w=120, h = 72)
7669
pdf.ln(84)
7770
multi_line("The above scatter plot represents the spread of predicted and actual scores. Assuming out model perfectly fits the data we can expect the points spread across the y = x line.")
7871
line("")
7972

8073

8174
section("3. Residual vs Predicted")
82-
pdf.image("./server_data/actual_pred.png",50,140,w=120, h = 72)
75+
pdf.image(f"{user_dir}/actual_pred.png",50,140,w=120, h = 72)
8376
pdf.ln(84)
8477
line("Residual = h(x) - f(x)")
8578
line("where,")
@@ -90,13 +83,13 @@ def create_report():
9083

9184
pdf.add_page()
9285
section("4. Mean Squared Error")
93-
pdf.image("./server_data/compare_error.png",50,20,w=120, h = 72)
86+
pdf.image(f"{user_dir}/compare_error.png",50,20,w=120, h = 72)
9487
pdf.ln(80)
9588
multi_line("The mean squared error is calculated as the sum of differences in actual and predicted result squared. The lower the MSE the better the model performs.")
9689
line("")
9790

9891
section("4. Normal Q-Q Plots")
99-
pdf.image("./server_data/normalqq.png",50,125,w=120, h = 72)
92+
pdf.image(f"{user_dir}/normalqq.png",50,125,w=120, h = 72)
10093
pdf.ln(75)
10194
line("")
10295
multi_line("The Q-Q plot, or quantile-quantile plot, is a graphical tool to help us assess if a set of data plausibly came from some theoretical distribution such as a Normal or exponential. For example, if we run a statistical analysis that assumes our dependent variable is Normally distributed, we can use a Normal Q-Q plot to check that assumption. It's just a visual check, not an air-tight proof, so it is somewhat subjective. But it allows us to see at-a-glance if our assumption is plausible, and if not, how the assumption is violated and what data points contribute to the violation.")
@@ -106,7 +99,6 @@ def create_report():
10699
pdf.add_page()
107100
pdf.image("./report/2.png",0,0,width)
108101

109-
pdf.output("./server_data/report.pdf", 'F')
110-
102+
pdf.output(f"{user_dir}/report.pdf", 'F')
103+
return 0
111104

112-
create_report()

models.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
import statsmodels.api as sm
1515

1616

17-
def reg_training_model(df, degree = 1, split_ratio = 0.2, email ="dummy"):
17+
def reg_training_model(df, user_dir, degree = 1, split_ratio = 0.2):
1818
'''
1919
function: poly_reg (Gives the polynomial regression for a set of data)
2020
Using PolymnomialFeatures and LinearRegression functions, fits a polynomial of degree n.
@@ -67,7 +67,7 @@ def reg_training_model(df, degree = 1, split_ratio = 0.2, email ="dummy"):
6767
plt.plot(df.x_test, df.y_test_pred, label = "Polynomial degree = {}".format(degree), color='r')
6868
plt.legend(loc='upper left')
6969
plt.title("Polynomial Fitting")
70-
plt.savefig("server_data/fitting.png")
70+
plt.savefig(f"{user_dir}/fitting.png")
7171
plt.show()
7272

7373

@@ -90,7 +90,7 @@ def reg_training_model(df, degree = 1, split_ratio = 0.2, email ="dummy"):
9090
plt.ylabel("Predicted Results")
9191
plt.legend(loc='upper left')
9292
plt.title("Actual VS Predicted")
93-
plt.savefig("server_data/actual_pred.png")
93+
plt.savefig(f"{user_dir}/actual_pred.png")
9494

9595

9696

@@ -102,7 +102,7 @@ def reg_training_model(df, degree = 1, split_ratio = 0.2, email ="dummy"):
102102
plt.ylabel("Dataset")
103103
plt.ylabel("Mean Square Error")
104104
plt.title("Comparing Errors")
105-
plt.savefig("server_data/compare_error.png")
105+
plt.savefig(f"{user_dir}/compare_error.png")
106106

107107

108108

@@ -119,7 +119,7 @@ def reg_training_model(df, degree = 1, split_ratio = 0.2, email ="dummy"):
119119
plt.ylabel("Residual")
120120
plt.legend(loc='upper left')
121121
plt.title("Residual VS Predicted")
122-
plt.savefig("server_data/residual_pred.png")
122+
plt.savefig(f"{user_dir}/residual_pred.png")
123123

124124

125125

@@ -135,6 +135,6 @@ def reg_training_model(df, degree = 1, split_ratio = 0.2, email ="dummy"):
135135
sm.qqplot(residual_test, line='45', ax = ax2)
136136
ax2.set_title("Test")
137137
ax2.set_ylabel("Standard Residual")
138-
plt.savefig("server_data/residual_pred.png")
138+
plt.savefig(f"{user_dir}/residual_pred.png")
139139
return 0
140140

receiver.py

Lines changed: 0 additions & 43 deletions
This file was deleted.
4.61 KB
Binary file not shown.
4.77 KB
Binary file not shown.

sender.py

Lines changed: 0 additions & 41 deletions
This file was deleted.

server.py

Lines changed: 0 additions & 32 deletions
This file was deleted.

server1.py

Lines changed: 26 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,19 +4,38 @@
44
import time
55
from models import *
66
import pandas as pd
7+
from utils import *
8+
from create_report import *
79

810
PORT = 2223
911
SERVER = socket.gethostname()
1012
server = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
1113
server.bind((SERVER, PORT))
14+
HEADER = 2048
15+
16+
server_dir = "./server_data/"
1217

1318
while True:
1419
server.listen(5)
1520
print(f"[LISTENING] Server is listening on {SERVER}")
1621
conn, addr = server.accept()
1722
print("Connection established with " + str(addr[0]) + ", " + str(addr[1]))
1823

19-
file_name = 'received.csv'
24+
# email = server.recv(HEADER).decode()
25+
# initial = email.split('@')[0]
26+
# password = server.recv(HEADER).decode()
27+
28+
initial = "jen"
29+
30+
user_dir = os.path.join(server_dir, initial)
31+
if os.path.isdir(user_dir):
32+
pass
33+
else:
34+
os.mkdir(user_dir)
35+
36+
37+
38+
file_name = f'./server_data/{initial}/received.csv'
2039
file_size = conn.recv(100).decode()
2140
print(file_size)
2241
# Opening and reading file.
@@ -50,8 +69,12 @@
5069

5170

5271
df = pd.read_csv(file_name)
53-
reg_training_model(df, degree = int(degree), split_ratio = int(training_ratio)/100 )
54-
72+
training_points = int(len(df)*int(training_ratio)/100)
73+
testing_points = len(df) - training_points
74+
reg_training_model(df, degree = int(degree), split_ratio = int(training_ratio)/100, user_dir = user_dir)
75+
76+
tot, maxi, mini, mean, medi, sd = stats_summary(df)
77+
c_report(tot, maxi, mini, mean, medi, sd, training_points, testing_points, degree, split = training_ratio, user_dir = user_dir)
5578

5679
# Closing the socket.
5780
conn.close()

server_data/jen/actual_pred.png

34.7 KB
Loading

server_data/jen/compare_error.png

14.1 KB
Loading

server_data/jen/fitting.png

26.2 KB
Loading

0 commit comments

Comments
 (0)