1
+ from os import path
2
+ import statistics as st
3
+ from urllib .request import urlretrieve
4
+
5
+
6
+ STATS = path .join ('/tmp' , 'testfiles_number_loc.txt' )
7
+ if not path .isfile (STATS ):
8
+ urlretrieve ('https://bit.ly/2Jp5CUt' , STATS )
9
+
10
+ STATS_OUTPUT = """
11
+ Basic statistics:
12
+ - count : {count:7d}
13
+ - min : {min_:7d}
14
+ - max : {max_:7d}
15
+ - mean : {mean:7.2f}
16
+
17
+ Population variance:
18
+ - pstdev : {pstdev:7.2f}
19
+ - pvariance : {pvariance:7.2f}
20
+
21
+ Estimated variance for sample:
22
+ - count : {sample_count:7.2f}
23
+ - stdev : {sample_stdev:7.2f}
24
+ - variance : {sample_variance:7.2f}
25
+ """
26
+
27
+ def get_all_line_counts (data : str = STATS ) -> list :
28
+ """Get all 186 line counts from the STATS file,
29
+ returning a list of ints"""
30
+ # TODO 1: get the 186 ints from downloaded STATS file
31
+ listofints = []
32
+ counts = 0
33
+ with open (data , 'r' ) as file :
34
+ data = file .read ()
35
+ for line in data .split ():
36
+ if not line :
37
+ break
38
+ if len (line ) > 5 :
39
+ continue
40
+ number = int (line )
41
+ listofints .append (number )
42
+ counts += 1
43
+ # print(line)
44
+ # print(counts)
45
+ return listofints
46
+
47
+
48
+ def create_stats_report (data = None ):
49
+ if data is None :
50
+ # converting to a list in case a generator was returned
51
+ data = list (get_all_line_counts ())
52
+
53
+ # taking a sample for the last section
54
+ sample = list (data )[::2 ]
55
+ # print(sample.__len__())
56
+ # TODO 2: complete this dict, use data list and
57
+ # for the last 3 sample_ variables, use sample list
58
+ stats = dict (count = None ,
59
+ min_ = None ,
60
+ max_ = None ,
61
+ mean = None ,
62
+ pstdev = None ,
63
+ pvariance = None ,
64
+ sample_count = None ,
65
+ sample_stdev = None ,
66
+ sample_variance = None ,
67
+ )
68
+ # print(min(data))
69
+ # print(type(data))
70
+ stats ['count' ] = len (data )
71
+ # print(data.__len__())
72
+ stats ['min_' ] = min (data )
73
+ # print(min(data))
74
+ stats ['max_' ] = max (data )
75
+ # print(max(data))
76
+ stats ['mean' ] = st .mean (data )
77
+ stats ['pstdev' ] = st .pstdev (data )
78
+ stats ['pvariance' ] = st .pvariance (data )
79
+ stats ['sample_count' ] = sample .__len__ ()
80
+ stats ['sample_stdev' ] = st .stdev (sample )
81
+ stats ['sample_variance' ] = st .variance (sample )
82
+ return STATS_OUTPUT .format (** stats )
83
+
84
+ # print(get_all_line_counts())
85
+ # print(get_all_line_counts().__len__())
86
+ # create_stats_report()
0 commit comments