-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path1layerlstm_dropout.py
88 lines (72 loc) · 2.72 KB
/
1layerlstm_dropout.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
# -*- coding: utf-8 -*-
"""
Created on Wed Dec 5 21:20:55 2018
@author: avinash
"""
import re
import pickle
import numpy as np
import pandas as pd
from PIL import Image
from wordcloud import WordCloud, STOPWORDS, ImageColorGenerator
import matplotlib.pyplot as plt
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
from keras.layers import LSTM
from keras.layers.embeddings import Embedding
from sklearn.model_selection import train_test_split
from keras.preprocessing import sequence
import tensorflow as tf
import csv
final=pd.read_csv('final.csv')
with open('total.csv', 'rt', encoding="utf8") as t:
reader = csv.reader(t)
total=list(reader)
fd = final['Score'].tolist()
#splitting train and test data in the ratio 80:20
x_train, x_test, y_train, y_test = train_test_split(total,fd,test_size=0.2,shuffle=False)
print("-----------------------TRAIN DATA------------------------------------")
print(len(x_train))
print(len(y_train))
print("---------------------------------------------------------------------")
print("\n-----------------------TEST DATA-------------------------------------")
print(len(x_test))
print(len(y_test))
#padding
X_train = sequence.pad_sequences(x_train, maxlen = 700)
X_test = sequence.pad_sequences(x_test, maxlen = 700)
print("-----------------------TRAIN DATA------------------------------------")
print(X_train.shape)
print(len(y_train))
print("---------------------------------------------------------------------")
print("\n-----------------------TEST DATA-------------------------------------")
print(X_test.shape)
print(len(y_test))
#model
model = Sequential()
model.add(Embedding(41500, 32, input_length = 700))
model.add(Dropout(0.20))
model.add(LSTM(100))
model.add(Dropout(0.20))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
print(model.summary())
#fitting the model
history = model.fit(X_train, y_train, epochs=10, batch_size=128, validation_data=(X_test, y_test))
#plotting train vs val loss
def Plot(err):
x = list(range(1,11))
v_loss = err.history['val_loss']
t_loss = err.history['loss']
plt.plot(x, v_loss, '-b', label='Validation Loss')
plt.plot(x, t_loss, '-r', label='Training Loss')
plt.legend(loc='center right')
plt.xlabel("EPOCHS",fontsize=15, color='black')
plt.ylabel("Train Loss & Validation Loss",fontsize=15, color='black')
plt.title("Train vs Validation Loss on Epoch's" ,fontsize=15, color='black')
plt.show()
Plot(history)
#calculating accuracy
scores = model.evaluate(X_test, y_test, verbose=0)
print("Accuracy: %.2f%%" % (scores[1]*100))