diff --git a/Project/SentimentAnalyzer.ipynb b/Project/SentimentAnalyzer.ipynb index acad8d8..fdb3b0a 100644 --- a/Project/SentimentAnalyzer.ipynb +++ b/Project/SentimentAnalyzer.ipynb @@ -6,13 +6,342 @@ "metadata": {}, "outputs": [], "source": [ - "# You may start here" + "pip install pandas " ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd #import pandas" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "df = pd.read_csv(\"data.csv\" , low_memory=False) #read the dataset in a dataframe to utilize the data in python." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pip install TextBlob" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [], + "source": [ + "from textblob import TextBlob #import TextBlob to procces the text." + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "metadata": {}, + "outputs": [], + "source": [ + "#this function will procces the reviews to get the sentiment from it.\n", + "def polarity(review):\n", + " score = TextBlob(review)\n", + " return score.sentiment.polarity" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [], + "source": [ + "df[\"sentiment_score\"] = df[\"reviews.text\"].astype(str).apply(sentiment)" + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "metadata": {}, + "outputs": [], + "source": [ + "def classifyer(score): #classify each review based on the sentiment score as good/bad/Neutral.\n", + " if score > 0.2:\n", + " return 'Good'\n", + " elif score < -0.2:\n", + " return 'Bad'\n", + " elif score >= -0.2 and score <= 0.2:\n", + " return 'Neutral'\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": 46, + "metadata": {}, + "outputs": [], + "source": [ + "df[\"sentimentAnalysis\"] = df[\"sentiment_score\"].apply(classifyer) #call and apply the classifyer function on a new column named sentimentAnalysis." + ] + }, + { + "cell_type": "code", + "execution_count": 47, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
| \n", + " | sentiment_score | \n", + "sentimentAnalysis | \n", + "
|---|---|---|
| 34635 | \n", + "0.059896 | \n", + "Neutral | \n", + "
| 34636 | \n", + "0.275238 | \n", + "Good | \n", + "
| 34637 | \n", + "0.228125 | \n", + "Good | \n", + "
| 34638 | \n", + "0.292133 | \n", + "Good | \n", + "
| 34639 | \n", + "0.035156 | \n", + "Neutral | \n", + "
| 34640 | \n", + "0.136364 | \n", + "Neutral | \n", + "
| 34641 | \n", + "0.135185 | \n", + "Neutral | \n", + "
| 34642 | \n", + "0.225000 | \n", + "Good | \n", + "
| 34643 | \n", + "0.643750 | \n", + "Good | \n", + "
| 34644 | \n", + "0.250000 | \n", + "Good | \n", + "
| 34645 | \n", + "0.503788 | \n", + "Good | \n", + "
| 34646 | \n", + "0.125000 | \n", + "Neutral | \n", + "
| 34647 | \n", + "0.533333 | \n", + "Good | \n", + "
| 34648 | \n", + "0.550000 | \n", + "Good | \n", + "
| 34649 | \n", + "0.050000 | \n", + "Neutral | \n", + "
| 34650 | \n", + "0.125000 | \n", + "Neutral | \n", + "
| 34651 | \n", + "0.000000 | \n", + "Neutral | \n", + "
| 34652 | \n", + "0.056250 | \n", + "Neutral | \n", + "
| 34653 | \n", + "0.178571 | \n", + "Neutral | \n", + "
| 34654 | \n", + "-0.068750 | \n", + "Neutral | \n", + "
| 34655 | \n", + "0.183333 | \n", + "Neutral | \n", + "
| 34656 | \n", + "0.000000 | \n", + "Neutral | \n", + "
| 34657 | \n", + "0.183333 | \n", + "Neutral | \n", + "
| 34658 | \n", + "-0.100000 | \n", + "Neutral | \n", + "
| 34659 | \n", + "0.412245 | \n", + "Good | \n", + "