Skip to content

Commit add094a

Browse files
authored
Add files via upload
1 parent bd3a84a commit add094a

File tree

2 files changed

+530
-0
lines changed

2 files changed

+530
-0
lines changed
+385
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,385 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "code",
5+
"execution_count": 6,
6+
"metadata": {},
7+
"outputs": [],
8+
"source": [
9+
"# import numpy package for arrays and stuff \n",
10+
"import numpy as np \n",
11+
"\n",
12+
"# import matplotlib.pyplot for plotting our result \n",
13+
"import matplotlib.pyplot as plt \n",
14+
"\n",
15+
"# import pandas for importing csv files \n",
16+
"import pandas as pd \n",
17+
"\n",
18+
"# import the regressor \n",
19+
"from sklearn.tree import DecisionTreeRegressor "
20+
]
21+
},
22+
{
23+
"cell_type": "code",
24+
"execution_count": 7,
25+
"metadata": {},
26+
"outputs": [],
27+
"source": [
28+
"# import dataset \n",
29+
"dataset = pd.read_csv('~/Downloads/Data Science/data set/Company_Data.csv') "
30+
]
31+
},
32+
{
33+
"cell_type": "code",
34+
"execution_count": 8,
35+
"metadata": {},
36+
"outputs": [
37+
{
38+
"data": {
39+
"text/html": [
40+
"<div>\n",
41+
"<style scoped>\n",
42+
" .dataframe tbody tr th:only-of-type {\n",
43+
" vertical-align: middle;\n",
44+
" }\n",
45+
"\n",
46+
" .dataframe tbody tr th {\n",
47+
" vertical-align: top;\n",
48+
" }\n",
49+
"\n",
50+
" .dataframe thead th {\n",
51+
" text-align: right;\n",
52+
" }\n",
53+
"</style>\n",
54+
"<table border=\"1\" class=\"dataframe\">\n",
55+
" <thead>\n",
56+
" <tr style=\"text-align: right;\">\n",
57+
" <th></th>\n",
58+
" <th>Sales</th>\n",
59+
" <th>CompPrice</th>\n",
60+
" <th>Income</th>\n",
61+
" <th>Advertising</th>\n",
62+
" <th>Population</th>\n",
63+
" <th>Price</th>\n",
64+
" <th>ShelveLoc</th>\n",
65+
" <th>Age</th>\n",
66+
" <th>Education</th>\n",
67+
" <th>Urban</th>\n",
68+
" <th>US</th>\n",
69+
" </tr>\n",
70+
" </thead>\n",
71+
" <tbody>\n",
72+
" <tr>\n",
73+
" <th>0</th>\n",
74+
" <td>9.50</td>\n",
75+
" <td>138</td>\n",
76+
" <td>73</td>\n",
77+
" <td>11</td>\n",
78+
" <td>276</td>\n",
79+
" <td>120</td>\n",
80+
" <td>Bad</td>\n",
81+
" <td>42</td>\n",
82+
" <td>17</td>\n",
83+
" <td>Yes</td>\n",
84+
" <td>Yes</td>\n",
85+
" </tr>\n",
86+
" <tr>\n",
87+
" <th>1</th>\n",
88+
" <td>11.22</td>\n",
89+
" <td>111</td>\n",
90+
" <td>48</td>\n",
91+
" <td>16</td>\n",
92+
" <td>260</td>\n",
93+
" <td>83</td>\n",
94+
" <td>Good</td>\n",
95+
" <td>65</td>\n",
96+
" <td>10</td>\n",
97+
" <td>Yes</td>\n",
98+
" <td>Yes</td>\n",
99+
" </tr>\n",
100+
" <tr>\n",
101+
" <th>2</th>\n",
102+
" <td>10.06</td>\n",
103+
" <td>113</td>\n",
104+
" <td>35</td>\n",
105+
" <td>10</td>\n",
106+
" <td>269</td>\n",
107+
" <td>80</td>\n",
108+
" <td>Medium</td>\n",
109+
" <td>59</td>\n",
110+
" <td>12</td>\n",
111+
" <td>Yes</td>\n",
112+
" <td>Yes</td>\n",
113+
" </tr>\n",
114+
" <tr>\n",
115+
" <th>3</th>\n",
116+
" <td>7.40</td>\n",
117+
" <td>117</td>\n",
118+
" <td>100</td>\n",
119+
" <td>4</td>\n",
120+
" <td>466</td>\n",
121+
" <td>97</td>\n",
122+
" <td>Medium</td>\n",
123+
" <td>55</td>\n",
124+
" <td>14</td>\n",
125+
" <td>Yes</td>\n",
126+
" <td>Yes</td>\n",
127+
" </tr>\n",
128+
" <tr>\n",
129+
" <th>4</th>\n",
130+
" <td>4.15</td>\n",
131+
" <td>141</td>\n",
132+
" <td>64</td>\n",
133+
" <td>3</td>\n",
134+
" <td>340</td>\n",
135+
" <td>128</td>\n",
136+
" <td>Bad</td>\n",
137+
" <td>38</td>\n",
138+
" <td>13</td>\n",
139+
" <td>Yes</td>\n",
140+
" <td>No</td>\n",
141+
" </tr>\n",
142+
" <tr>\n",
143+
" <th>...</th>\n",
144+
" <td>...</td>\n",
145+
" <td>...</td>\n",
146+
" <td>...</td>\n",
147+
" <td>...</td>\n",
148+
" <td>...</td>\n",
149+
" <td>...</td>\n",
150+
" <td>...</td>\n",
151+
" <td>...</td>\n",
152+
" <td>...</td>\n",
153+
" <td>...</td>\n",
154+
" <td>...</td>\n",
155+
" </tr>\n",
156+
" <tr>\n",
157+
" <th>395</th>\n",
158+
" <td>12.57</td>\n",
159+
" <td>138</td>\n",
160+
" <td>108</td>\n",
161+
" <td>17</td>\n",
162+
" <td>203</td>\n",
163+
" <td>128</td>\n",
164+
" <td>Good</td>\n",
165+
" <td>33</td>\n",
166+
" <td>14</td>\n",
167+
" <td>Yes</td>\n",
168+
" <td>Yes</td>\n",
169+
" </tr>\n",
170+
" <tr>\n",
171+
" <th>396</th>\n",
172+
" <td>6.14</td>\n",
173+
" <td>139</td>\n",
174+
" <td>23</td>\n",
175+
" <td>3</td>\n",
176+
" <td>37</td>\n",
177+
" <td>120</td>\n",
178+
" <td>Medium</td>\n",
179+
" <td>55</td>\n",
180+
" <td>11</td>\n",
181+
" <td>No</td>\n",
182+
" <td>Yes</td>\n",
183+
" </tr>\n",
184+
" <tr>\n",
185+
" <th>397</th>\n",
186+
" <td>7.41</td>\n",
187+
" <td>162</td>\n",
188+
" <td>26</td>\n",
189+
" <td>12</td>\n",
190+
" <td>368</td>\n",
191+
" <td>159</td>\n",
192+
" <td>Medium</td>\n",
193+
" <td>40</td>\n",
194+
" <td>18</td>\n",
195+
" <td>Yes</td>\n",
196+
" <td>Yes</td>\n",
197+
" </tr>\n",
198+
" <tr>\n",
199+
" <th>398</th>\n",
200+
" <td>5.94</td>\n",
201+
" <td>100</td>\n",
202+
" <td>79</td>\n",
203+
" <td>7</td>\n",
204+
" <td>284</td>\n",
205+
" <td>95</td>\n",
206+
" <td>Bad</td>\n",
207+
" <td>50</td>\n",
208+
" <td>12</td>\n",
209+
" <td>Yes</td>\n",
210+
" <td>Yes</td>\n",
211+
" </tr>\n",
212+
" <tr>\n",
213+
" <th>399</th>\n",
214+
" <td>9.71</td>\n",
215+
" <td>134</td>\n",
216+
" <td>37</td>\n",
217+
" <td>0</td>\n",
218+
" <td>27</td>\n",
219+
" <td>120</td>\n",
220+
" <td>Good</td>\n",
221+
" <td>49</td>\n",
222+
" <td>16</td>\n",
223+
" <td>Yes</td>\n",
224+
" <td>Yes</td>\n",
225+
" </tr>\n",
226+
" </tbody>\n",
227+
"</table>\n",
228+
"<p>400 rows × 11 columns</p>\n",
229+
"</div>"
230+
],
231+
"text/plain": [
232+
" Sales CompPrice Income Advertising Population Price ShelveLoc Age \\\n",
233+
"0 9.50 138 73 11 276 120 Bad 42 \n",
234+
"1 11.22 111 48 16 260 83 Good 65 \n",
235+
"2 10.06 113 35 10 269 80 Medium 59 \n",
236+
"3 7.40 117 100 4 466 97 Medium 55 \n",
237+
"4 4.15 141 64 3 340 128 Bad 38 \n",
238+
".. ... ... ... ... ... ... ... ... \n",
239+
"395 12.57 138 108 17 203 128 Good 33 \n",
240+
"396 6.14 139 23 3 37 120 Medium 55 \n",
241+
"397 7.41 162 26 12 368 159 Medium 40 \n",
242+
"398 5.94 100 79 7 284 95 Bad 50 \n",
243+
"399 9.71 134 37 0 27 120 Good 49 \n",
244+
"\n",
245+
" Education Urban US \n",
246+
"0 17 Yes Yes \n",
247+
"1 10 Yes Yes \n",
248+
"2 12 Yes Yes \n",
249+
"3 14 Yes Yes \n",
250+
"4 13 Yes No \n",
251+
".. ... ... ... \n",
252+
"395 14 Yes Yes \n",
253+
"396 11 No Yes \n",
254+
"397 18 Yes Yes \n",
255+
"398 12 Yes Yes \n",
256+
"399 16 Yes Yes \n",
257+
"\n",
258+
"[400 rows x 11 columns]"
259+
]
260+
},
261+
"execution_count": 8,
262+
"metadata": {},
263+
"output_type": "execute_result"
264+
}
265+
],
266+
"source": [
267+
"dataset"
268+
]
269+
},
270+
{
271+
"cell_type": "code",
272+
"execution_count": 9,
273+
"metadata": {},
274+
"outputs": [],
275+
"source": [
276+
"# seprate categorical and numerical value\n",
277+
"categorical = [col for col in dataset.columns if dataset[col].dtype==object]\n",
278+
"numerical = [col for col in dataset.columns if dataset[col].dtype!=object]"
279+
]
280+
},
281+
{
282+
"cell_type": "code",
283+
"execution_count": 10,
284+
"metadata": {},
285+
"outputs": [
286+
{
287+
"name": "stdout",
288+
"output_type": "stream",
289+
"text": [
290+
" ShelveLoc Urban US\n",
291+
"0 0 0 0\n",
292+
"1 1 0 0\n",
293+
"2 2 0 0\n",
294+
"3 2 0 0\n",
295+
"4 0 0 1\n",
296+
".. ... ... ..\n",
297+
"395 1 0 0\n",
298+
"396 2 1 0\n",
299+
"397 2 0 0\n",
300+
"398 0 0 0\n",
301+
"399 1 0 0\n",
302+
"\n",
303+
"[400 rows x 3 columns]\n"
304+
]
305+
}
306+
],
307+
"source": [
308+
"# for convert categorical to int\n",
309+
"for col in categorical:\n",
310+
" temp = {}\n",
311+
" count = 0\n",
312+
" for val in dataset[col].values:\n",
313+
" try:\n",
314+
" temp[val]\n",
315+
" except:\n",
316+
" temp[val] = count\n",
317+
" count += 1\n",
318+
" dataset[col] = [temp[x] for x in dataset[col].values]\n",
319+
"print(dataset[categorical])"
320+
]
321+
},
322+
{
323+
"cell_type": "code",
324+
"execution_count": 11,
325+
"metadata": {},
326+
"outputs": [],
327+
"source": [
328+
"# print the dataset \n",
329+
"\n",
330+
"X = dataset.iloc[0:199].values \n",
331+
"y = dataset.iloc[200:399].values"
332+
]
333+
},
334+
{
335+
"cell_type": "code",
336+
"execution_count": 12,
337+
"metadata": {},
338+
"outputs": [
339+
{
340+
"data": {
341+
"text/plain": [
342+
"DecisionTreeRegressor(ccp_alpha=0.0, criterion='mse', max_depth=None,\n",
343+
" max_features=None, max_leaf_nodes=None,\n",
344+
" min_impurity_decrease=0.0, min_impurity_split=None,\n",
345+
" min_samples_leaf=1, min_samples_split=2,\n",
346+
" min_weight_fraction_leaf=0.0, presort='deprecated',\n",
347+
" random_state=0, splitter='best')"
348+
]
349+
},
350+
"execution_count": 12,
351+
"metadata": {},
352+
"output_type": "execute_result"
353+
}
354+
],
355+
"source": [
356+
"# create a decisiontreeregressor model \n",
357+
"regressor = DecisionTreeRegressor(random_state = 0) \n",
358+
"\n",
359+
"# fit the regressor with X and Y data \n",
360+
"regressor.fit(X, y) "
361+
]
362+
}
363+
],
364+
"metadata": {
365+
"kernelspec": {
366+
"display_name": "Python 3",
367+
"language": "python",
368+
"name": "python3"
369+
},
370+
"language_info": {
371+
"codemirror_mode": {
372+
"name": "ipython",
373+
"version": 3
374+
},
375+
"file_extension": ".py",
376+
"mimetype": "text/x-python",
377+
"name": "python",
378+
"nbconvert_exporter": "python",
379+
"pygments_lexer": "ipython3",
380+
"version": "3.6.8"
381+
}
382+
},
383+
"nbformat": 4,
384+
"nbformat_minor": 4
385+
}

0 commit comments

Comments
 (0)