-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathretail-data-analysis.py
More file actions
115 lines (55 loc) · 1.64 KB
/
retail-data-analysis.py
File metadata and controls
115 lines (55 loc) · 1.64 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
#!/usr/bin/env python
# coding: utf-8
# In[1]:
get_ipython().system('pip install kaggle')
# In[19]:
import sys
print(sys.executable)
# In[ ]:
import kaggle
# In[3]:
get_ipython().system('kaggle datasets download -d ankitbansal06/retail-orders')
# In[3]:
import zipfile
zip_ref=zipfile.ZipFile('retail-orders.zip')
zip_ref.extractall()
zip_ref.close()
# In[4]:
import pandas as pd
df=pd.read_csv('orders.csv',na_values=['Not Available', 'unknown'])
df.head(45)
# reading data and handling null values
#df['Ship Mode'].unique()
# In[5]:
# renaming columns
#df.rename(columns={'Order Id':'order_id','City':'city'})
df.columns=df.columns.str.lower()
df.columns=df.columns.str.replace(' ','_')
df.head()
# In[6]:
df['discount_price']=df['list_price']*df['discount_percent']*0.01
df['sale_price']=df['list_price']-df['discount_price']
df['profit']=df['sale_price']-df['cost_price']
df.head(2)
# In[55]:
# df['order_date'].dtypes
df['order_date']=pd.to_datetime(df['order_date'],format="%Y-%m-%d")
df.dtypes
# In[17]:
# so deleting columns which are not useful
df.drop(columns=['cost_price', 'list_price', 'discount_percent'], inplace=True)
df.head(2)
# In[2]:
pip install psycopg2
# In[20]:
import pandas as pd
from sqlalchemy import create_engine
# PostgreSQL connection
from dotenv import load_dotenv
import os
load_dotenv()
engine = create_engine("postgresql+psycopg2://${DB_USER}:${DB_PASSWORD}@${DB_HOST}:${DB_PORT}/${DB_NAME}")
# Upload DataFrame to PostgreSQL table
df.to_sql('df_orders', con=engine, index=False, if_exists='append') # i wont be using replace as it will set constraint to max
# In[19]:
df.columns