Skip to content

Commit 4381137

Browse files
committed
DataPylot Full Release
1 parent f5224af commit 4381137

83 files changed

Lines changed: 3242 additions & 1923 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

DataPylot/CodeGenerators/Cleaning/gen_exe_miss.py

Lines changed: 33 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
class MissingValueExecutor:
44
@staticmethod
5-
def generate(df, feature=None, operation=None, new_name=None, session=None, withImport=True):
5+
def generate(df, feature=None, operation=None, new_name=None, new_col=None, session=None, withImport=True):
66
import_stmts = ["import pandas as pd"]
77
code = ""
88

@@ -11,10 +11,10 @@ def generate(df, feature=None, operation=None, new_name=None, session=None, with
1111
if not isinstance(data, pd.DataFrame):
1212
return code, import_stmts
1313

14-
if new_name is None:
15-
new_name = df
16-
1714
if feature is None:
15+
if new_name is None:
16+
new_name = df
17+
1818
code += (
1919
f"# Remove rows with missing values from the entire dataset\n"
2020
f"{new_name} = {df}.dropna()"
@@ -25,12 +25,18 @@ def generate(df, feature=None, operation=None, new_name=None, session=None, with
2525
return code, import_stmts
2626
match operation:
2727
case "remove":
28+
if new_name is None:
29+
new_name = df
30+
2831
code += (
2932
f"# Remove rows with missing values in feature '{feature}'\n"
3033
f"{new_name} = {df}.dropna(subset=['{feature}'])"
3134
)
3235
result = data.dropna(subset=[feature])
3336
case "mode":
37+
if new_name is None:
38+
new_name = df
39+
3440
code += (
3541
f"# Impute missing values in feature '{feature}'\n"
3642
f"mode_value = {df}['{feature}'].mode()[0]\n"
@@ -39,6 +45,9 @@ def generate(df, feature=None, operation=None, new_name=None, session=None, with
3945
mode_value = data[feature].mode()[0]
4046
result = data.fillna({feature: mode_value})
4147
case "median":
48+
if new_name is None:
49+
new_name = df
50+
4251
code += (
4352
f"# Impute missing values in feature '{feature}' with median\n"
4453
f"median_value = {df}['{feature}'].median()\n"
@@ -47,14 +56,34 @@ def generate(df, feature=None, operation=None, new_name=None, session=None, with
4756
median_value = data[feature].median()
4857
result = data.fillna({feature: median_value})
4958
case "mean":
59+
if new_name is None:
60+
new_name = df
61+
5062
code += (
5163
f"# Impute missing values in feature '{feature}' with mean\n"
5264
f"mean_value = {df}['{feature}'].mean()\n"
5365
f"{new_name} = {df}.fillna({{'{feature}': mean_value}})"
5466
)
5567
mean_value = data[feature].mean()
5668
result = data.fillna({feature: mean_value})
69+
case "dummy":
70+
code += (
71+
f"# Replace feature '{feature}' to an indicator of: 1 = Value Present, 0 = Value Missing\n"
72+
)
73+
if new_name is None:
74+
new_name = df
75+
else:
76+
code += f"{new_name} = {df}.copy()"
77+
78+
if new_col is None:
79+
new_col = feature
5780

81+
code += (
82+
f"{new_name}['{new_col}'] = {df}['{feature}'].notnull().astype(int)"
83+
)
84+
result = data
85+
result[new_col] = data[feature].notnull().astype(int)
86+
5887
session.addDataFrame(new_name, result)
5988

6089
if withImport:

DataPylot/CodeGenerators/Cleaning/gen_exe_outlier.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
from numpy._core.defchararray import upper
21
import pandas as pd
32

43
class OutlierExecutor:
Lines changed: 51 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -1,69 +1,74 @@
11
class BivariatePlotGenerator:
22
@staticmethod
3-
def generate(df, plot, x, y, title, rotation, color, withImport):
3+
def generate(df, plot_type, params, withImport=False):
44
import_stmts = [
55
"import matplotlib.pyplot as plt",
66
"import seaborn as sns"
77
]
8-
code = ""
98

10-
title_stmt = f"plt.title(\"{title}\")\n" if title else ""
11-
rot_stmt = f"plt.xticks(rotation={rotation})\n" if rotation != 0 else ""
12-
color_stmt = f", palette='{color}'" if color else ""
9+
code = "plt.figure(figsize=(8, 6))\n"
1310

14-
match plot:
15-
case "Violin Plot":
11+
style = params.get("Plot Style")
12+
if style:
13+
code += f"sns.set_style('{style}')\n"
14+
15+
if params.get("Grid Lines"):
16+
code += "plt.grid(True)\n"
17+
18+
palette = f", palette='{params['Color Palette']}'" if params.get("Color Palette") else ""
19+
color = f", color='{params['Color']}'" if params.get("Color") else ""
20+
marker = f", marker='{params['Marker Style']}'" if params.get("Marker Style") else ""
21+
size = f", s={params['Point Size']}" if params.get("Point Size") is not None else ""
22+
rotation = params.get("Label Rotation", 0)
23+
rot_stmt = f"plt.xticks(rotation={rotation})\n" if rotation else ""
24+
25+
title = params.get("Plot Title")
26+
if title:
27+
code += f"plt.title('{title}')\n"
28+
29+
xlabel = params.get("X Axis Label") or params.get("X Axis")
30+
if xlabel:
31+
code += f"plt.xlabel('{xlabel}')\n"
32+
33+
ylabel = params.get("Y Axis Label") or params.get("Y Axis")
34+
if ylabel:
35+
code += f"plt.ylabel('{ylabel}')\n"
36+
37+
match plot_type:
38+
case "Grouped Count Plot":
1639
code += (
17-
f"plt.figure(figsize=(8, 6))\n"
18-
f"sns.violinplot(data={df}, x='{x}', y='{y}', hue='{x}'{color_stmt})\n"
19-
f"{title_stmt}"
20-
f"{rot_stmt}"
21-
f"plt.show()"
40+
f"sns.countplot(data={df}, x='{params['X Axis']}', hue='{params['Hue']}'{palette})\n"
2241
)
23-
case "Swarm Plot":
42+
43+
case "Grouped Box Plot":
2444
code += (
25-
f"plt.figure(figsize=(8, 6))\n"
26-
f"sns.swarmplot(data={df}, x='{x}', y='{y}', hue='{x}'{color_stmt})\n"
27-
f"{title_stmt}"
28-
f"{rot_stmt}"
29-
f"plt.show()"
45+
f"sns.boxplot(data={df}, x='{params['X Axis']}', hue='{params['Hue']}'{palette})\n"
3046
)
31-
case "Scatter Plot":
32-
color_stmt = f", color='{color}'" if color else ""
47+
48+
case "Violin Plot":
3349
code += (
34-
f"plt.figure(figsize=(8, 6))\n"
35-
f"sns.scatterplot(data={df}, x='{x}', y='{y}'{color_stmt})\n"
36-
f"{title_stmt}"
37-
f"{rot_stmt}"
38-
f"plt.show()"
50+
f"sns.violinplot(data={df}, x='{params['X Axis']}', y='{params['Y Axis']}'{color})\n"
3951
)
40-
case "Line Plot":
52+
53+
case "Swarm Plot":
4154
code += (
42-
f"plt.figure(figsize=(8, 6))\n"
43-
f"sns.lineplot(data={df}, x='{x}', y='{y}'{color_stmt})\n"
44-
f"{title_stmt}"
45-
f"{rot_stmt}"
46-
f"plt.show()"
55+
f"sns.swarmplot(data={df}, x='{params['X Axis']}', y='{params['Y Axis']}'{color}{size}{marker})\n"
4756
)
48-
case "Grouped Count Plot":
57+
58+
case "Scatter Plot":
4959
code += (
50-
f"plt.figure(figsize=(8, 6))\n"
51-
f"sns.countplot(data={df}, x='{x}', hue='{y}'{color_stmt})\n"
52-
f"{title_stmt}"
53-
f"{rot_stmt}"
54-
f"plt.show()"
60+
f"sns.scatterplot(data={df}, x='{params['X Axis']}', y='{params['Y Axis']}'{color}{size}{marker})\n"
5561
)
56-
case "Grouped Box Plot":
62+
63+
case "Line Plot":
5764
code += (
58-
f"plt.figure(figsize=(8, 6))\n"
59-
f"sns.boxplot(data={df}, x='{x}', hue='{y}'{color_stmt})\n"
60-
f"{title_stmt}"
61-
f"{rot_stmt}"
62-
f"plt.show()"
65+
f"sns.lineplot(data={df}, x='{params['X Axis']}', y='{params['Y Axis']}'{color}{marker})\n"
6366
)
6467

68+
code += rot_stmt
69+
code += "plt.show()"
70+
6571
if withImport:
66-
imports_code = "\n".join(import_stmts)
67-
code = f"{imports_code}\n\n{code}"
72+
code = "\n".join(import_stmts) + "\n\n" + code
6873

69-
return code, import_stmts
74+
return code, import_stmts

DataPylot/CodeGenerators/EDA/gen_multi.py

Lines changed: 59 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -1,76 +1,88 @@
11
class MultivariatePlotGenerator:
22
@staticmethod
3-
def generate(df, plot, x, y, hue, title, rotation, color, withImport):
3+
def generate(df, plot_type, params, withImport=False):
44
import_stmts = [
55
"import matplotlib.pyplot as plt",
66
"import seaborn as sns"
77
]
8-
code = ""
8+
code = "plt.figure(figsize=(8, 6))\n"
99

10-
title_stmt = f"plt.title(\"{title}\")\n" if title else ""
11-
rot_stmt = f"plt.xticks(rotation={rotation})\n" if rotation != 0 else ""
12-
color_stmt = f", palette='{color}'" if color else ""
10+
if params.get("Style"):
11+
code += f"sns.set_style('{params['Style']}')\n"
1312

14-
match plot:
15-
case "Heatmap":
16-
color_stmt = f", cmap='{color}'" if color else ""
13+
if params.get("Show Legend") is False:
14+
legend_stmt = "plt.legend().remove()\n"
15+
else:
16+
legend_stmt = ""
17+
18+
if params.get("Grid Lines"):
19+
code += "plt.grid(True)\n"
20+
21+
palette = f", palette='{params['Palette']}'" if params.get("Palette") else ""
22+
color = f", color='{params['Color']}'" if params.get("Color") else ""
23+
cmap = f", cmap='{params['Color Map']}'" if params.get("Color Map") else ""
24+
marker = f", marker='{params['Marker Style']}'" if params.get("Marker Style") else ""
25+
linestyle = f", linestyle='{params['Line Style']}'" if params.get("Line Style") else ""
26+
alpha = f", alpha={params['Alpha (Transparency)']}" if params.get("Alpha (Transparency)") is not None else ""
27+
28+
rotation = params.get("Label Rotation", 0)
29+
rot_stmt = f"plt.xticks(rotation={rotation})\n" if rotation else ""
30+
31+
title = params.get("Plot Title")
32+
if title:
33+
code += f"plt.title('{title}')\n"
34+
35+
xlabel = params.get("X Axis Label") or params.get("X Axis")
36+
if xlabel:
37+
code += f"plt.xlabel('{xlabel}')\n"
38+
39+
ylabel = params.get("Y Axis Label") or params.get("Y Axis")
40+
if ylabel:
41+
code += f"plt.ylabel('{ylabel}')\n"
42+
43+
44+
match plot_type:
45+
case "Grouped Violin Plot":
1746
code += (
18-
f"correlation_matrix = {df}[{df}.select_dtypes(include='number').columns].corr()\n"
19-
f"plt.figure(figsize=(10, 8))\n"
20-
f"sns.heatmap(correlation_matrix, annot=True{color_stmt}, fmt='.2f', cbar=True)\n"
21-
f"{title_stmt}"
22-
f"plt.show()\n"
47+
f"sns.violinplot(data={df}, x='{params['X Axis']}', y='{params['Y Axis']}', hue='{params['Hue']}'{palette}, split=True)\n"
48+
f"{rot_stmt}{legend_stmt}"
2349
)
2450

25-
case "Pair Plot":
26-
hue_stmt = f", hue='{hue}'{color_stmt}" if hue else ""
51+
case "Grouped Swarm Plot":
2752
code += (
28-
f"plt.figure(figsize=(10, 8))\n"
29-
f"sns.pairplot({df}.select_dtypes(include='number'), diag_kind='kde'{hue_stmt})\n"
30-
f"{title_stmt}"
31-
f"plt.show()\n"
53+
f"sns.swarmplot(data={df}, x='{params['X Axis']}', y='{params['Y Axis']}', hue='{params['Hue']}'{palette}{alpha})\n"
54+
f"{rot_stmt}{legend_stmt}"
3255
)
3356

34-
case "Violin Plot":
35-
hue_stmt = f", hue='{hue}'{color_stmt}" if hue else ""
57+
case "Grouped Scatter Plot":
3658
code += (
37-
f"plt.figure(figsize=(8, 6))\n"
38-
f"sns.violinplot(data={df}, x='{x}', y='{y}'{hue_stmt}, split=True)\n"
39-
f"{title_stmt}"
40-
f"{rot_stmt}"
41-
f"plt.show()\n"
59+
f"sns.scatterplot(data={df}, x='{params['X Axis']}', y='{params['Y Axis']}', hue='{params['Hue']}'{palette}{marker}{alpha})\n"
60+
f"{rot_stmt}{legend_stmt}"
4261
)
4362

44-
case "Swarm Plot":
45-
hue_stmt = f", hue='{hue}'{color_stmt}" if hue else ""
63+
case "Grouped Line Plot":
4664
code += (
47-
f"plt.figure(figsize=(8, 6))\n"
48-
f"sns.swarmplot(data={df}, x='{x}', y='{y}'{hue_stmt})\n"
49-
f"{title_stmt}"
50-
f"{rot_stmt}"
51-
f"plt.show()\n"
65+
f"sns.lineplot(data={df}, x='{params['X Axis']}', y='{params['Y Axis']}', hue='{params['Hue']}'{marker}{linestyle}{alpha})\n"
66+
f"{rot_stmt}{legend_stmt}"
5267
)
5368

54-
case "Scatter Plot":
55-
hue_stmt = f", hue='{hue}'{color_stmt}" if hue else ""
69+
case "Heatmap":
70+
method = params.get("Correlation Method", "pearson")
71+
annot = f", annot=True" if params.get("Annotate Values", True) else ""
5672
code += (
57-
f"plt.figure(figsize=(8, 6))\n"
58-
f"sns.scatterplot(data={df}, x='{x}', y='{y}'{hue_stmt})\n"
59-
f"{title_stmt}"
60-
f"{rot_stmt}"
61-
f"plt.show()\n"
73+
f"correlation_matrix = {df}[{df}.select_dtypes(include='number').columns].corr(method='{method}')\n"
74+
f"sns.heatmap(correlation_matrix{annot}{cmap}, fmt='.2f', cbar=True)\n"
6275
)
6376

64-
case "Line Plot":
65-
hue_stmt = f", hue='{hue}'{color_stmt}" if hue else ""
77+
case "Pair Plot":
78+
hue_stmt = f", hue='{params['Hue']}'" if params.get("Hue") else ""
79+
kind_stmt = f", kind='{params['Kind']}'" if params.get("Kind") else ""
6680
code += (
67-
f"plt.figure(figsize=(8, 6))\n"
68-
f"sns.lineplot(data={df}, x='{x}', y='{y}'{hue_stmt})\n"
69-
f"{title_stmt}"
70-
f"{rot_stmt}"
71-
f"plt.show()\n"
81+
f"sns.pairplot({df}.select_dtypes(include='number'){hue_stmt}{palette}{kind_stmt})\n"
7282
)
7383

84+
code += "plt.show()"
85+
7486
if withImport:
7587
imports_code = "\n".join(import_stmts)
7688
code = f"{imports_code}\n\n{code}"

0 commit comments

Comments
 (0)