Skip to content

Commit 51844d9

Browse files
authored
Add files via upload
1 parent 44728da commit 51844d9

File tree

1 file changed

+179
-32
lines changed

1 file changed

+179
-32
lines changed

Sentiment Analysis.ipynb

+179-32
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
},
1414
{
1515
"cell_type": "code",
16-
"execution_count": 1,
16+
"execution_count": 2,
1717
"metadata": {},
1818
"outputs": [],
1919
"source": [
@@ -62,7 +62,7 @@
6262
},
6363
{
6464
"cell_type": "code",
65-
"execution_count": 8,
65+
"execution_count": 3,
6666
"metadata": {},
6767
"outputs": [],
6868
"source": [
@@ -93,7 +93,7 @@
9393
},
9494
{
9595
"cell_type": "code",
96-
"execution_count": 3,
96+
"execution_count": 4,
9797
"metadata": {},
9898
"outputs": [],
9999
"source": [
@@ -130,7 +130,7 @@
130130
},
131131
{
132132
"cell_type": "code",
133-
"execution_count": 4,
133+
"execution_count": 5,
134134
"metadata": {},
135135
"outputs": [],
136136
"source": [
@@ -159,7 +159,7 @@
159159
},
160160
{
161161
"cell_type": "code",
162-
"execution_count": 37,
162+
"execution_count": 6,
163163
"metadata": {},
164164
"outputs": [],
165165
"source": [
@@ -191,7 +191,7 @@
191191
},
192192
{
193193
"cell_type": "code",
194-
"execution_count": 9,
194+
"execution_count": 7,
195195
"metadata": {},
196196
"outputs": [
197197
{
@@ -372,7 +372,7 @@
372372
},
373373
{
374374
"cell_type": "code",
375-
"execution_count": 14,
375+
"execution_count": 8,
376376
"metadata": {},
377377
"outputs": [],
378378
"source": [
@@ -387,9 +387,42 @@
387387
},
388388
{
389389
"cell_type": "code",
390-
"execution_count": 16,
390+
"execution_count": 9,
391391
"metadata": {},
392-
"outputs": [],
392+
"outputs": [
393+
{
394+
"name": "stderr",
395+
"output_type": "stream",
396+
"text": [
397+
"/home/hitesh/Documents/sentiment_analysis/local/lib/python2.7/site-packages/bs4/__init__.py:282: UserWarning: \"http://www.amazon.com/gp/product/B00OSTKZWM?redirect=true&ref_=cm_cr_ryp_prd_ttl_sol_1\" looks like a URL. Beautiful Soup is not an HTTP client. You should probably use an HTTP client like requests to get the document behind the URL, and feed that document to Beautiful Soup.\n",
398+
" ' that document to Beautiful Soup.' % decoded_markup\n",
399+
"/home/hitesh/Documents/sentiment_analysis/local/lib/python2.7/site-packages/bs4/__init__.py:282: UserWarning: \"http://www.amazon.com/gp/product/B013YDFH3Y?redirect=true&ref_=cm_cr_ryp_prd_ttl_sol_0\" looks like a URL. Beautiful Soup is not an HTTP client. You should probably use an HTTP client like requests to get the document behind the URL, and feed that document to Beautiful Soup.\n",
400+
" ' that document to Beautiful Soup.' % decoded_markup\n",
401+
"/home/hitesh/Documents/sentiment_analysis/local/lib/python2.7/site-packages/bs4/__init__.py:282: UserWarning: \"https://www.amazon.com/gp/product/B00U8KSNB0/ref=cm_cr_ryp_prd_ttl_sol_22\" looks like a URL. Beautiful Soup is not an HTTP client. You should probably use an HTTP client like requests to get the document behind the URL, and feed that document to Beautiful Soup.\n",
402+
" ' that document to Beautiful Soup.' % decoded_markup\n",
403+
"/home/hitesh/Documents/sentiment_analysis/local/lib/python2.7/site-packages/bs4/__init__.py:282: UserWarning: \"http://www.amazon.com/gp/product/B00PEJQU9M?redirect=true&ref_=cm_cr_ryp_prd_ttl_sol_0\" looks like a URL. Beautiful Soup is not an HTTP client. You should probably use an HTTP client like requests to get the document behind the URL, and feed that document to Beautiful Soup.\n",
404+
" ' that document to Beautiful Soup.' % decoded_markup\n",
405+
"/home/hitesh/Documents/sentiment_analysis/local/lib/python2.7/site-packages/bs4/__init__.py:282: UserWarning: \"https://www.amazon.com/dp/B00K15KRV6/ref=cm_cr_ryp_prd_ttl_sol_22\" looks like a URL. Beautiful Soup is not an HTTP client. You should probably use an HTTP client like requests to get the document behind the URL, and feed that document to Beautiful Soup.\n",
406+
" ' that document to Beautiful Soup.' % decoded_markup\n",
407+
"/home/hitesh/Documents/sentiment_analysis/local/lib/python2.7/site-packages/bs4/__init__.py:282: UserWarning: \"https://www.amazon.com/gp/product/B00G197Q4M/ref=cm_cr_ryp_prd_ttl_sol_26\" looks like a URL. Beautiful Soup is not an HTTP client. You should probably use an HTTP client like requests to get the document behind the URL, and feed that document to Beautiful Soup.\n",
408+
" ' that document to Beautiful Soup.' % decoded_markup\n",
409+
"/home/hitesh/Documents/sentiment_analysis/local/lib/python2.7/site-packages/bs4/__init__.py:282: UserWarning: \"http://www.amazon.com/gp/product/B0193D539M?redirect=true&ref_=cm_cr_ryp_prd_ttl_sol_0\" looks like a URL. Beautiful Soup is not an HTTP client. You should probably use an HTTP client like requests to get the document behind the URL, and feed that document to Beautiful Soup.\n",
410+
" ' that document to Beautiful Soup.' % decoded_markup\n",
411+
"/home/hitesh/Documents/sentiment_analysis/local/lib/python2.7/site-packages/bs4/__init__.py:282: UserWarning: \"https://www.amazon.com/dp/B01BO6BYMQ/ref=cm_cr_ryp_prd_ttl_sol_1\" looks like a URL. Beautiful Soup is not an HTTP client. You should probably use an HTTP client like requests to get the document behind the URL, and feed that document to Beautiful Soup.\n",
412+
" ' that document to Beautiful Soup.' % decoded_markup\n",
413+
"/home/hitesh/Documents/sentiment_analysis/local/lib/python2.7/site-packages/bs4/__init__.py:282: UserWarning: \"https://www.amazon.com/gp/product/B00JEMZYM4/ref=cm_cr_ryp_prd_ttl_sol_0\" looks like a URL. Beautiful Soup is not an HTTP client. You should probably use an HTTP client like requests to get the document behind the URL, and feed that document to Beautiful Soup.\n",
414+
" ' that document to Beautiful Soup.' % decoded_markup\n",
415+
"/home/hitesh/Documents/sentiment_analysis/local/lib/python2.7/site-packages/bs4/__init__.py:282: UserWarning: \"https://www.amazon.com/dp/B00QF5QJR2/ref=cm_cr_ryp_prd_ttl_sol_0\" looks like a URL. Beautiful Soup is not an HTTP client. You should probably use an HTTP client like requests to get the document behind the URL, and feed that document to Beautiful Soup.\n",
416+
" ' that document to Beautiful Soup.' % decoded_markup\n",
417+
"/home/hitesh/Documents/sentiment_analysis/local/lib/python2.7/site-packages/bs4/__init__.py:282: UserWarning: \"https://www.amazon.com/dp/B01CJU9BBM/ref=cm_cr_ryp_prd_ttl_sol_0\" looks like a URL. Beautiful Soup is not an HTTP client. You should probably use an HTTP client like requests to get the document behind the URL, and feed that document to Beautiful Soup.\n",
418+
" ' that document to Beautiful Soup.' % decoded_markup\n",
419+
"/home/hitesh/Documents/sentiment_analysis/local/lib/python2.7/site-packages/bs4/__init__.py:282: UserWarning: \"https://www.amazon.com/gp/product/B00JFNDLRC/ref=cm_cr_ryp_prd_ttl_sol_0\" looks like a URL. Beautiful Soup is not an HTTP client. You should probably use an HTTP client like requests to get the document behind the URL, and feed that document to Beautiful Soup.\n",
420+
" ' that document to Beautiful Soup.' % decoded_markup\n",
421+
"/home/hitesh/Documents/sentiment_analysis/local/lib/python2.7/site-packages/bs4/__init__.py:282: UserWarning: \"http://www.amazon.com/gp/product/B00EZHM9JE?redirect=true&ref_=cm_cr_ryp_prd_ttl_sol_0\" looks like a URL. Beautiful Soup is not an HTTP client. You should probably use an HTTP client like requests to get the document behind the URL, and feed that document to Beautiful Soup.\n",
422+
" ' that document to Beautiful Soup.' % decoded_markup\n"
423+
]
424+
}
425+
],
393426
"source": [
394427
" # Preprocess text data in training set and validation set\n",
395428
" x_train_cleaned = []\n",
@@ -415,7 +448,7 @@
415448
},
416449
{
417450
"cell_type": "code",
418-
"execution_count": 42,
451+
"execution_count": 10,
419452
"metadata": {},
420453
"outputs": [
421454
{
@@ -507,7 +540,7 @@
507540
},
508541
{
509542
"cell_type": "code",
510-
"execution_count": 40,
543+
"execution_count": 11,
511544
"metadata": {},
512545
"outputs": [
513546
{
@@ -529,7 +562,7 @@
529562
" verbose=0, warm_start=False)"
530563
]
531564
},
532-
"execution_count": 40,
565+
"execution_count": 11,
533566
"metadata": {},
534567
"output_type": "execute_result"
535568
}
@@ -633,24 +666,64 @@
633666
},
634667
{
635668
"cell_type": "code",
636-
"execution_count": 43,
669+
"execution_count": null,
637670
"metadata": {},
671+
"outputs": [],
672+
"source": [
673+
" #x_train_subset = tfidf.transform(x_train_cleaned[:100])\n",
674+
" x_train_input = tfidf.transform(x_train_cleaned)\n",
675+
" svr_lin = LinearSVC(multi_class='ovr',C=1.0,loss='squared_hinge', dual=False)\n",
676+
" svr_lin.fit(x_train_input, y_train)\n",
677+
" y_svr_lin_predicted = svr_lin.predict(tfidf.transform(x_test_cleaned))"
678+
]
679+
},
680+
{
681+
"cell_type": "code",
682+
"execution_count": 16,
683+
"metadata": {},
684+
"outputs": [],
685+
"source": [
686+
" modelEvaluation(y_svr_lin_predicted, y_test)"
687+
]
688+
},
689+
{
690+
"cell_type": "markdown",
691+
"metadata": {},
692+
"source": [
693+
"### Functions for Model Evaluation\n",
694+
"\n",
695+
"There are multiple functions for model evaluation in scikit learn. To know more about them, please follow the below mentioned links\n",
696+
"- [accuracy score](http://scikit-learn.org/stable/modules/generated/sklearn.metrics.accuracy_score.html#sklearn.metrics.accuracy_score)\n",
697+
"- [f_score](http://scikit-learn.org/stable/modules/generated/sklearn.metrics.precision_recall_fscore_support.html)\n",
698+
"- [f1_score](http://scikit-learn.org/stable/modules/generated/sklearn.metrics.f1_score.html#sklearn.metrics.f1_score)\n",
699+
"- [confusion matrix](http://scikit-learn.org/stable/modules/generated/sklearn.metrics.confusion_matrix.html#sklearn.metrics.confusion_matrix)"
700+
]
701+
},
702+
{
703+
"cell_type": "code",
704+
"execution_count": 23,
705+
"metadata": {
706+
"scrolled": true
707+
},
638708
"outputs": [
639709
{
640710
"name": "stdout",
641711
"output_type": "stream",
642712
"text": [
643-
"Accuracy of this SVM = 0.8917012568575658\n"
713+
"Accuracy of this SVM = 0.9409305970799106\n",
714+
"Fscore of this SVM = (0.9412812101129703, 0.9409305970799106, 0.9384909185837339, None)\n",
715+
"F-1 score of this SVM = 0.9384909185837339\n",
716+
"confusion matrix = [[ 7477 33 628]\n",
717+
" [ 306 1775 655]\n",
718+
" [ 373 40 23164]]\n"
644719
]
645720
}
646721
],
647722
"source": [
648-
" #x_train_subset = tfidf.transform(x_train_cleaned[:100])\n",
649-
" x_train_input = tfidf.transform(x_train_cleaned)\n",
650-
" svr_lin = LinearSVC(multi_class='ovr',C=1.0,loss='squared_hinge', dual=False)\n",
651-
" svr_lin.fit(x_train_input, y_train)\n",
652-
" y_predicted = svr_lin.predict(tfidf.transform(x_test_cleaned))\n",
653-
" print \"Accuracy of this SVM = \" + str(metrics.accuracy_score(y_test, y_predicted))"
723+
" print \"Accuracy of this SVM = \" + str(metrics.accuracy_score(y_test, y_svr_lin_predicted))\n",
724+
" print \"Fscore of this SVM = \" + str(metrics.precision_recall_fscore_support(y_test, y_svr_lin_predicted, pos_label=2, average='weighted'))\n",
725+
" print \"F-1 score of this SVM = \" + str(metrics.f1_score(y_test, y_svr_lin_predicted, pos_label=2, average='weighted'))\n",
726+
" print \"confusion matrix = \" + str(metrics.confusion_matrix(y_test, y_svr_lin_predicted))"
654727
]
655728
},
656729
{
@@ -664,22 +737,47 @@
664737
},
665738
{
666739
"cell_type": "code",
667-
"execution_count": 44,
740+
"execution_count": null,
741+
"metadata": {},
742+
"outputs": [],
743+
"source": [
744+
" rand = RandomForestClassifier()\n",
745+
" rand.fit(x_train_input, y_train)\n",
746+
" y_rand_predicted = rand.predict(tfidf.transform(x_test_cleaned))"
747+
]
748+
},
749+
{
750+
"cell_type": "code",
751+
"execution_count": 25,
752+
"metadata": {},
753+
"outputs": [],
754+
"source": [
755+
" modelEvaluation(y_rand_predicted, y_test)"
756+
]
757+
},
758+
{
759+
"cell_type": "code",
760+
"execution_count": 26,
668761
"metadata": {},
669762
"outputs": [
670763
{
671764
"name": "stdout",
672765
"output_type": "stream",
673766
"text": [
674-
"Accuracy of Random Forest = 0.9395082871324489\n"
767+
"Accuracy of Random Forest = 0.9386665118574207\n",
768+
"Fscore of this SVM = (0.9391700836892655, 0.9386665118574207, 0.9362244481117618, None)\n",
769+
"F-1 score of this SVM = 0.9362244481117618\n",
770+
"confusion matrix = [[ 7486 33 619]\n",
771+
" [ 338 1759 639]\n",
772+
" [ 446 38 23093]]\n"
675773
]
676774
}
677775
],
678776
"source": [
679-
" rand = RandomForestClassifier()\n",
680-
" rand.fit(x_train_input, y_train)\n",
681-
" y_predicted = rand.predict(tfidf.transform(x_test_cleaned))\n",
682-
" print \"Accuracy of Random Forest = \" + str(rand.score(tfidf.transform(x_test_cleaned), y_test))"
777+
" print \"Accuracy of Random Forest = \" + str(rand.score(tfidf.transform(x_test_cleaned), y_test))\n",
778+
" print \"Fscore of this SVM = \" + str(metrics.precision_recall_fscore_support(y_test, y_predicted, pos_label=2, average='weighted'))\n",
779+
" print \"F-1 score of this SVM = \" + str(metrics.f1_score(y_test, y_predicted, pos_label=2, average='weighted'))\n",
780+
" print \"confusion matrix = \" + str(metrics.confusion_matrix(y_test, y_predicted))"
683781
]
684782
},
685783
{
@@ -693,22 +791,71 @@
693791
},
694792
{
695793
"cell_type": "code",
696-
"execution_count": 18,
794+
"execution_count": null,
795+
"metadata": {},
796+
"outputs": [],
797+
"source": [
798+
" decTree = DecisionTreeClassifier()\n",
799+
" decTree.fit(x_train_input, y_train)\n",
800+
" y_decTree_predicted = decTree.predict(tfidf.transform(x_test_cleaned))"
801+
]
802+
},
803+
{
804+
"cell_type": "code",
805+
"execution_count": 29,
697806
"metadata": {},
698807
"outputs": [
699808
{
700809
"name": "stdout",
701810
"output_type": "stream",
702811
"text": [
703-
"Accuracy of Decision Tree = 0.9263591768018344\n"
812+
"\n",
813+
"Accuracy on validation set: 0.9262\n",
814+
"\n",
815+
"Classification report : \n",
816+
" precision recall f1-score support\n",
817+
"\n",
818+
" 0 0.90 0.90 0.90 8138\n",
819+
" 1 0.78 0.70 0.73 2736\n",
820+
" 2 0.95 0.96 0.96 23577\n",
821+
"\n",
822+
"avg / total 0.92 0.93 0.93 34451\n",
823+
"\n",
824+
"\n",
825+
"Confusion Matrix : \n",
826+
"[[ 7291 244 603]\n",
827+
" [ 299 1902 535]\n",
828+
" [ 555 306 22716]]\n"
704829
]
705830
}
706831
],
707832
"source": [
708-
" decTree = DecisionTreeClassifier()\n",
709-
" decTree.fit(x_train_input, y_train)\n",
710-
" y_predicted = decTree.predict(tfidf.transform(x_test_cleaned))\n",
711-
" print \"Accuracy of Decision Tree = \" + str(decTree.score(tfidf.transform(x_test_cleaned), y_test))"
833+
" modelEvaluation(y_decTree_predicted, y_test)"
834+
]
835+
},
836+
{
837+
"cell_type": "code",
838+
"execution_count": 30,
839+
"metadata": {},
840+
"outputs": [
841+
{
842+
"name": "stdout",
843+
"output_type": "stream",
844+
"text": [
845+
"Accuracy of Decision Tree = 0.9262140431337261\n",
846+
"Fscore of this SVM = (0.9247698369985567, 0.9262140431337261, 0.9252945198875524, None)\n",
847+
"F-1 score of this SVM = 0.9252945198875524\n",
848+
"confusion matrix = [[ 7291 244 603]\n",
849+
" [ 299 1902 535]\n",
850+
" [ 555 306 22716]]\n"
851+
]
852+
}
853+
],
854+
"source": [
855+
" print \"Accuracy of Decision Tree = \" + str(decTree.score(tfidf.transform(x_test_cleaned), y_test))\n",
856+
" print \"Fscore of this SVM = \" + str(metrics.precision_recall_fscore_support(y_test, y_decTree_predicted, pos_label=2, average='weighted'))\n",
857+
" print \"F-1 score of this SVM = \" + str(metrics.f1_score(y_test, y_decTree_predicted, pos_label=2, average='weighted'))\n",
858+
" print \"confusion matrix = \" + str(metrics.confusion_matrix(y_test, y_decTree_predicted))"
712859
]
713860
}
714861
],

0 commit comments

Comments
 (0)