diff --git a/02_activities/assignments/assignment_1.ipynb b/02_activities/assignments/assignment_1.ipynb index 28d4df017..2a5773f89 100644 --- a/02_activities/assignments/assignment_1.ipynb +++ b/02_activities/assignments/assignment_1.ipynb @@ -96,7 +96,7 @@ "metadata": {}, "outputs": [], "source": [ - "# Your answer here" + "wine_dfshape[0]\n" ] }, { @@ -114,7 +114,7 @@ "metadata": {}, "outputs": [], "source": [ - "# Your answer here" + "wine_dfshape[1]" ] }, { @@ -132,7 +132,7 @@ "metadata": {}, "outputs": [], "source": [ - "# Your answer here" + "print(wine_df['class'].dtype, wine_df['class'].unique())" ] }, { @@ -151,7 +151,7 @@ "metadata": {}, "outputs": [], "source": [ - "# Your answer here" + "wine_dfshape[1]-1" ] }, { @@ -204,7 +204,7 @@ "id": "403ef0bb", "metadata": {}, "source": [ - "> Your answer here..." + "> For uniform and fair comparision of data in same context" ] }, { @@ -220,7 +220,7 @@ "id": "fdee5a15", "metadata": {}, "source": [ - "> Your answer here..." + "> as this is the response class which we want to find" ] }, { @@ -236,7 +236,7 @@ "id": "f0676c21", "metadata": {}, "source": [ - "> Your answer here..." + "> seed value used so that sequence of random numbers is same each time. same seed value, guarantees the same results each time. random seed number is fine, nothing particular is needed" ] }, { @@ -261,7 +261,10 @@ "\n", "# split the data into a training and testing set. hint: use train_test_split !\n", "\n", - "# Your code here ..." + "A = predictors_standardized\n", + "B = wine_df['class']\n", + "A_train, A_test, B_train, B_test = train_test_split(A, B, test_size=0.25,stratify=wine_df['class'])\n" + "print(A_train.shape, A_test.shape)\n" ] }, { @@ -289,7 +292,14 @@ "metadata": {}, "outputs": [], "source": [ - "# Your code here..." + "from sklearn.neighbors import KNeighborsClassifier\n", + "from sklearn.model_selection import GridSearchCV\n", + "knn = KNeighborsClassifier() \n", + "param_grid = {\"n_neighbors\": list(range(1, 51))}\n", + "grid_search = GridSearchCV(knn, param_grid, cv=10, scoring='accuracy')\n", + "grid_search.fit(A_train, B_train)\n", + "best_k = grid_search.best_params_['n_neighbors']\n", + "print(f'Best k value:{best_k}')" ] }, { @@ -310,7 +320,13 @@ "metadata": {}, "outputs": [], "source": [ - "# Your code here..." + "from sklearn.neighbors import KNeighborsClassifier\n", + "from sklearn.metrics import accuracy_score\n", + "final_knn = KNeighborsClassifier(n_neighbors=best_k)\n", + "final_knn.fit(A_train, B_train)\n", + "B_pred = final_knn.predict(A_test)\n", + "accuracy = accuracy_score(A_test, B_pred)\n", + "print(f'Test Set Accuracy: {accuracy}')" ] }, {