Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
343 changes: 340 additions & 3 deletions Project/SentimentAnalyzer.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -2,17 +2,354 @@
"cells": [
{
"cell_type": "code",
"execution_count": null,
"execution_count": 34,
"metadata": {},
"outputs": [],
"source": [
"\"\"\"\n",
"\n",
"-A description of the project.\n",
" The goal of the project using dataset of prodect reviews and getting the Sentiment analysis of it.then categories each review if its (good, bad, neutral) .\n",
"\n",
"-A description of the dataset used.\n",
" a collection of product reviews\n",
"\n",
"-A description of the methods used to analyze the data.\n",
" import the library needed. then retrive the data from data.csv.after that create function callded getSentiment getting Sentiment from the data and call the function then categories score range.Last create column that contain categories for each review \n",
"\n",
"-A description of the results.\n",
" The result appears to you based on the data and classifications that you set. For example, the result appears as Good if the Score is greater than 0.1, otherwise as shown in the code.\n",
"\n",
"\n",
"\"\"\"\n",
"#import panda library\n",
"import pandas as pd"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"\n",
"#retrieve data from data.csv\n",
"df = pd.read_csv('data.csv',low_memory=False)"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [],
"source": [
"#import TextBlob library\n",
"from textblob import TextBlob"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [],
"source": [
"# You may start here"
"#create function callded getSentiment getting Sentiment from the data \n",
"def getSentiment(data):\n",
" blob = TextBlob(data)\n",
" return blob.sentiment.polarity"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {},
"outputs": [],
"source": [
"#call getSentiment function and create column in data frame \n",
"df['sentiment'] = df['reviews.text'].astype(str).apply(getSentiment)\n"
]
},
{
"cell_type": "code",
"execution_count": 36,
"metadata": {},
"outputs": [],
"source": [
"#here we put score range for each sentiment review score (good , bad , Neutral ) \n",
"def categories(sentiment):\n",
" if sentiment >= 0.2:\n",
" return 'Good'\n",
" elif sentiment <= 0.2:\n",
" return 'Bad'\n",
" elif sentiment >= 0.2 and sentiment <= -0.2 :\n",
" return 'Neutral'\n",
" else:\n",
" return 'error'\n"
]
},
{
"cell_type": "code",
"execution_count": 30,
"metadata": {},
"outputs": [],
"source": [
"#create column that contain categories for each review \n",
"df['Result'] = df['sentiment'].apply(categories)\n"
]
},
{
"cell_type": "code",
"execution_count": 38,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Result</th>\n",
" <th>sentiment</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Good</td>\n",
" <td>0.325000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Good</td>\n",
" <td>0.800000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Good</td>\n",
" <td>0.600000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>Good</td>\n",
" <td>0.374583</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Good</td>\n",
" <td>0.368056</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>Good</td>\n",
" <td>0.375000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>Good</td>\n",
" <td>0.525000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>Good</td>\n",
" <td>0.544444</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>Good</td>\n",
" <td>0.406667</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>Good</td>\n",
" <td>0.500000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10</th>\n",
" <td>Bad</td>\n",
" <td>-0.216667</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11</th>\n",
" <td>Good</td>\n",
" <td>0.549206</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12</th>\n",
" <td>Good</td>\n",
" <td>0.278667</td>\n",
" </tr>\n",
" <tr>\n",
" <th>13</th>\n",
" <td>Neutral</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>14</th>\n",
" <td>Good</td>\n",
" <td>0.250000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>15</th>\n",
" <td>Good</td>\n",
" <td>0.416667</td>\n",
" </tr>\n",
" <tr>\n",
" <th>16</th>\n",
" <td>Good</td>\n",
" <td>0.766667</td>\n",
" </tr>\n",
" <tr>\n",
" <th>17</th>\n",
" <td>Bad</td>\n",
" <td>-0.196875</td>\n",
" </tr>\n",
" <tr>\n",
" <th>18</th>\n",
" <td>Good</td>\n",
" <td>0.400000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>19</th>\n",
" <td>Good</td>\n",
" <td>0.258333</td>\n",
" </tr>\n",
" <tr>\n",
" <th>20</th>\n",
" <td>Good</td>\n",
" <td>0.866667</td>\n",
" </tr>\n",
" <tr>\n",
" <th>21</th>\n",
" <td>Good</td>\n",
" <td>0.208333</td>\n",
" </tr>\n",
" <tr>\n",
" <th>22</th>\n",
" <td>Good</td>\n",
" <td>0.583333</td>\n",
" </tr>\n",
" <tr>\n",
" <th>23</th>\n",
" <td>Good</td>\n",
" <td>0.668750</td>\n",
" </tr>\n",
" <tr>\n",
" <th>24</th>\n",
" <td>Good</td>\n",
" <td>0.526667</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25</th>\n",
" <td>Good</td>\n",
" <td>0.266667</td>\n",
" </tr>\n",
" <tr>\n",
" <th>26</th>\n",
" <td>Good</td>\n",
" <td>0.825000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>27</th>\n",
" <td>Good</td>\n",
" <td>0.300000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>28</th>\n",
" <td>Good</td>\n",
" <td>0.375000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>29</th>\n",
" <td>Good</td>\n",
" <td>0.298667</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Result sentiment\n",
"0 Good 0.325000\n",
"1 Good 0.800000\n",
"2 Good 0.600000\n",
"3 Good 0.374583\n",
"4 Good 0.368056\n",
"5 Good 0.375000\n",
"6 Good 0.525000\n",
"7 Good 0.544444\n",
"8 Good 0.406667\n",
"9 Good 0.500000\n",
"10 Bad -0.216667\n",
"11 Good 0.549206\n",
"12 Good 0.278667\n",
"13 Neutral 0.000000\n",
"14 Good 0.250000\n",
"15 Good 0.416667\n",
"16 Good 0.766667\n",
"17 Bad -0.196875\n",
"18 Good 0.400000\n",
"19 Good 0.258333\n",
"20 Good 0.866667\n",
"21 Good 0.208333\n",
"22 Good 0.583333\n",
"23 Good 0.668750\n",
"24 Good 0.526667\n",
"25 Good 0.266667\n",
"26 Good 0.825000\n",
"27 Good 0.300000\n",
"28 Good 0.375000\n",
"29 Good 0.298667"
]
},
"execution_count": 38,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[['Result', 'sentiment']].head(30)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"name": "python"
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.1"
}
},
"nbformat": 4,
Expand Down