diff --git a/Project/SentimentAnalyzer.ipynb b/Project/SentimentAnalyzer.ipynb index acad8d8..fdb3b0a 100644 --- a/Project/SentimentAnalyzer.ipynb +++ b/Project/SentimentAnalyzer.ipynb @@ -6,13 +6,342 @@ "metadata": {}, "outputs": [], "source": [ - "# You may start here" + "pip install pandas " ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd #import pandas" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "df = pd.read_csv(\"data.csv\" , low_memory=False) #read the dataset in a dataframe to utilize the data in python." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pip install TextBlob" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [], + "source": [ + "from textblob import TextBlob #import TextBlob to procces the text." + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "metadata": {}, + "outputs": [], + "source": [ + "#this function will procces the reviews to get the sentiment from it.\n", + "def polarity(review):\n", + " score = TextBlob(review)\n", + " return score.sentiment.polarity" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [], + "source": [ + "df[\"sentiment_score\"] = df[\"reviews.text\"].astype(str).apply(sentiment)" + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "metadata": {}, + "outputs": [], + "source": [ + "def classifyer(score): #classify each review based on the sentiment score as good/bad/Neutral.\n", + " if score > 0.2:\n", + " return 'Good'\n", + " elif score < -0.2:\n", + " return 'Bad'\n", + " elif score >= -0.2 and score <= 0.2:\n", + " return 'Neutral'\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": 46, + "metadata": {}, + "outputs": [], + "source": [ + "df[\"sentimentAnalysis\"] = df[\"sentiment_score\"].apply(classifyer) #call and apply the classifyer function on a new column named sentimentAnalysis." + ] + }, + { + "cell_type": "code", + "execution_count": 47, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sentiment_scoresentimentAnalysis
346350.059896Neutral
346360.275238Good
346370.228125Good
346380.292133Good
346390.035156Neutral
346400.136364Neutral
346410.135185Neutral
346420.225000Good
346430.643750Good
346440.250000Good
346450.503788Good
346460.125000Neutral
346470.533333Good
346480.550000Good
346490.050000Neutral
346500.125000Neutral
346510.000000Neutral
346520.056250Neutral
346530.178571Neutral
34654-0.068750Neutral
346550.183333Neutral
346560.000000Neutral
346570.183333Neutral
34658-0.100000Neutral
346590.412245Good
\n", + "
" + ], + "text/plain": [ + " sentiment_score sentimentAnalysis\n", + "34635 0.059896 Neutral\n", + "34636 0.275238 Good\n", + "34637 0.228125 Good\n", + "34638 0.292133 Good\n", + "34639 0.035156 Neutral\n", + "34640 0.136364 Neutral\n", + "34641 0.135185 Neutral\n", + "34642 0.225000 Good\n", + "34643 0.643750 Good\n", + "34644 0.250000 Good\n", + "34645 0.503788 Good\n", + "34646 0.125000 Neutral\n", + "34647 0.533333 Good\n", + "34648 0.550000 Good\n", + "34649 0.050000 Neutral\n", + "34650 0.125000 Neutral\n", + "34651 0.000000 Neutral\n", + "34652 0.056250 Neutral\n", + "34653 0.178571 Neutral\n", + "34654 -0.068750 Neutral\n", + "34655 0.183333 Neutral\n", + "34656 0.000000 Neutral\n", + "34657 0.183333 Neutral\n", + "34658 -0.100000 Neutral\n", + "34659 0.412245 Good" + ] + }, + "execution_count": 47, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df[[\"sentiment_score\",\"sentimentAnalysis\"]].tail(25)" + ] + }, + { + "cell_type": "code", + "execution_count": 49, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'Answer: the result is based on the classifyer function which will display good if sentiment score over 0.2, bad if score under 0.2 and neutral in between.'" + ] + }, + "execution_count": 49, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#A description of the project.?\n", + "\"\"\"Answer:Getting sentiment analysis of products reviews from a dataset and making sense of the data based on polarity score of each one to classfy them as good,bad or neutral. \"\"\"\n", + "#A description of the dataset used.?\n", + "\"\"\"Answer:Dataset of products and thier reviews.\"\"\"\n", + "#A description of the methods used to analyze the data.?\n", + "\"\"\"Answer:A polarity function that process the reviews to get the sentiment from it.\n", + " then use the Classifyer function to classify each review based on the sentiment score as good/bad/Neutral.\"\"\"\n", + "#A description of the results.?\n", + "\"\"\"Answer: the result is based on the classifyer function which will display good if sentiment score over 0.2, bad if score under 0.2 and neutral in between.\"\"\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, "language_info": { - "name": "python" + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.2" } }, "nbformat": 4,