diff --git a/Project/SentimentAnalyzer.ipynb b/Project/SentimentAnalyzer.ipynb index acad8d8..493e5fe 100644 --- a/Project/SentimentAnalyzer.ipynb +++ b/Project/SentimentAnalyzer.ipynb @@ -2,17 +2,354 @@ "cells": [ { "cell_type": "code", - "execution_count": null, + "execution_count": 34, + "metadata": {}, + "outputs": [], + "source": [ + "\"\"\"\n", + "\n", + "-A description of the project.\n", + " The goal of the project using dataset of prodect reviews and getting the Sentiment analysis of it.then categories each review if its (good, bad, neutral) .\n", + "\n", + "-A description of the dataset used.\n", + " a collection of product reviews\n", + "\n", + "-A description of the methods used to analyze the data.\n", + " import the library needed. then retrive the data from data.csv.after that create function callded getSentiment getting Sentiment from the data and call the function then categories score range.Last create column that contain categories for each review \n", + "\n", + "-A description of the results.\n", + " The result appears to you based on the data and classifications that you set. For example, the result appears as Good if the Score is greater than 0.1, otherwise as shown in the code.\n", + "\n", + "\n", + "\"\"\"\n", + "#import panda library\n", + "import pandas as pd" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "#retrieve data from data.csv\n", + "df = pd.read_csv('data.csv',low_memory=False)" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "#import TextBlob library\n", + "from textblob import TextBlob" + ] + }, + { + "cell_type": "code", + "execution_count": 14, "metadata": {}, "outputs": [], "source": [ - "# You may start here" + "#create function callded getSentiment getting Sentiment from the data \n", + "def getSentiment(data):\n", + " blob = TextBlob(data)\n", + " return blob.sentiment.polarity" ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [], + "source": [ + "#call getSentiment function and create column in data frame \n", + "df['sentiment'] = df['reviews.text'].astype(str).apply(getSentiment)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "metadata": {}, + "outputs": [], + "source": [ + "#here we put score range for each sentiment review score (good , bad , Neutral ) \n", + "def categories(sentiment):\n", + " if sentiment >= 0.2:\n", + " return 'Good'\n", + " elif sentiment <= 0.2:\n", + " return 'Bad'\n", + " elif sentiment >= 0.2 and sentiment <= -0.2 :\n", + " return 'Neutral'\n", + " else:\n", + " return 'error'\n" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": {}, + "outputs": [], + "source": [ + "#create column that contain categories for each review \n", + "df['Result'] = df['sentiment'].apply(categories)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
| \n", + " | Result | \n", + "sentiment | \n", + "
|---|---|---|
| 0 | \n", + "Good | \n", + "0.325000 | \n", + "
| 1 | \n", + "Good | \n", + "0.800000 | \n", + "
| 2 | \n", + "Good | \n", + "0.600000 | \n", + "
| 3 | \n", + "Good | \n", + "0.374583 | \n", + "
| 4 | \n", + "Good | \n", + "0.368056 | \n", + "
| 5 | \n", + "Good | \n", + "0.375000 | \n", + "
| 6 | \n", + "Good | \n", + "0.525000 | \n", + "
| 7 | \n", + "Good | \n", + "0.544444 | \n", + "
| 8 | \n", + "Good | \n", + "0.406667 | \n", + "
| 9 | \n", + "Good | \n", + "0.500000 | \n", + "
| 10 | \n", + "Bad | \n", + "-0.216667 | \n", + "
| 11 | \n", + "Good | \n", + "0.549206 | \n", + "
| 12 | \n", + "Good | \n", + "0.278667 | \n", + "
| 13 | \n", + "Neutral | \n", + "0.000000 | \n", + "
| 14 | \n", + "Good | \n", + "0.250000 | \n", + "
| 15 | \n", + "Good | \n", + "0.416667 | \n", + "
| 16 | \n", + "Good | \n", + "0.766667 | \n", + "
| 17 | \n", + "Bad | \n", + "-0.196875 | \n", + "
| 18 | \n", + "Good | \n", + "0.400000 | \n", + "
| 19 | \n", + "Good | \n", + "0.258333 | \n", + "
| 20 | \n", + "Good | \n", + "0.866667 | \n", + "
| 21 | \n", + "Good | \n", + "0.208333 | \n", + "
| 22 | \n", + "Good | \n", + "0.583333 | \n", + "
| 23 | \n", + "Good | \n", + "0.668750 | \n", + "
| 24 | \n", + "Good | \n", + "0.526667 | \n", + "
| 25 | \n", + "Good | \n", + "0.266667 | \n", + "
| 26 | \n", + "Good | \n", + "0.825000 | \n", + "
| 27 | \n", + "Good | \n", + "0.300000 | \n", + "
| 28 | \n", + "Good | \n", + "0.375000 | \n", + "
| 29 | \n", + "Good | \n", + "0.298667 | \n", + "