-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdistribution_plot.py
27 lines (26 loc) · 1.28 KB
/
distribution_plot.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
def dist_box(data):
'''
Code will perform univariate analysis for all continuous variables in dataframe.
For each variable, creates plot with spread, central tendency , dispersion and outliers.
'''
Name=data.name.upper()
fig,(ax_box,ax_dis) =plt.subplots(nrows=2,sharex=True,gridspec_kw = {"height_ratios": (.25, .75)},figsize=(8, 5))
mean=data.mean()
median=data.median()
mode=data.mode().tolist()[0]
sns.set_theme(style="white")
sns.set_palette(sns.color_palette("Set1", 8))
fig.suptitle("SPREAD OF DATA FOR "+ Name , fontsize=18, fontweight='bold')
sns.boxplot(x=data,showmeans=True, orient='h',ax=ax_box)
ax_box.set(xlabel='')
# just trying to make visualisation better. This will set background to white
sns.despine(top=True,right=True,left=True) # to remove side line from graph
sns.set_palette(sns.color_palette("Set1", 8))
sns.distplot(data,kde=False,ax=ax_dis)
ax_dis.axvline(mean, color='r', linestyle='--',linewidth=2)
ax_dis.axvline(median, color='g', linestyle='-',linewidth=2)
ax_dis.axvline(mode, color='y', linestyle='-',linewidth=2)
plt.legend({'Mean':mean,'Median':median,'Mode':mode})
list_col = data.select_dtypes(include='number').columns.to_list()
for i in range(len(list_col)):
dist_box(data[list_col[i]])