Skip to content

Commit a0ef319

Browse files
committed
Merge commit '5b2a33598de2cefebd3d3afbf16d5bf585eb090b' as 'contextual-utterance-level-multimodal-sentiment-analysis'
2 parents 97e3f01 + 5b2a335 commit a0ef319

27 files changed

+18542
-0
lines changed
Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
# 🔥 🔥 🔥 Notice: This repository will no longer be maintained. Instead, we are are moving all our multimodal works to this new centralized repository: [https://github.com/declare-lab/multimodal-deep-learning](https://github.com/declare-lab/multimodal-deep-learning).
2+
3+
# Context-Dependent Sentiment Analysis in User-Generated Videos
4+
Code for the paper [Context-Dependent Sentiment Analysis in User-Generated Videos](http://sentic.net/context-dependent-sentiment-analysis-in-user-generated-videos.pdf) (ACL 2017).
5+
6+
## NOTE: Here is the updated version of the code - https://github.com/soujanyaporia/multimodal-sentiment-analysis
7+
8+
### Requirements
9+
Code is written in Python (2.7) and requires Keras (2.0.6) with Theano backend.
10+
11+
### Description
12+
In this paper, we propose a LSTM-based model that enables utterances to capture contextual information from their surroundings in the same video, thus aiding the classification process in multimodal sentiment analysis.
13+
14+
![Alt text](network.jpg?raw=true "Title")
15+
16+
This repository contains the code for the mentioned paper. Each contextual LSTM (Figure 2 in the paper) is implemented as shown in above figure. For more details, please refer to the paper.
17+
Note: Unlike the paper, we haven't used an SVM on the penultimate layer. This is in effort to keep the whole network differentiable at some performance cost.
18+
19+
### Dataset
20+
We provide results on the [MOSI dataset](https://arxiv.org/pdf/1606.06259.pdf)
21+
Please cite the creators
22+
23+
24+
### Preprocessing
25+
As data is typically present in utterance format, we combine all the utterances belonging to a video using the following code
26+
27+
```
28+
python create_data.py
29+
```
30+
31+
Note: This will create speaker independent train and test splits
32+
33+
### Running sc-lstm
34+
35+
Sample command:
36+
37+
```
38+
python lstm.py --unimodal True
39+
python lstm.py --unimodal False
40+
```
41+
42+
Note: Keeping the unimodal flag as True (default False) shall train all unimodal lstms first (level 1 of the network mentioned in the paper)
43+
44+
### Citation
45+
46+
If using this code, please cite our work using :
47+
```
48+
@inproceedings{soujanyaacl17,
49+
title={Context-dependent sentiment analysis in user-generated videos},
50+
author={Poria, Soujanya and Cambria, Erik and Hazarika, Devamanyu and Mazumder, Navonil and Zadeh, Amir and Morency, Louis-Philippe},
51+
booktitle={Association for Computational Linguistics},
52+
year={2017}
53+
}
54+
```
55+
56+
### Credits
57+
58+
Devamanyu Hazarika, Soujanya Poria
59+
Lines changed: 123 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,123 @@
1+
import numpy as np, pandas as pd
2+
from collections import defaultdict
3+
import pickle
4+
from sklearn import preprocessing
5+
min_max_scaler = preprocessing.MinMaxScaler()
6+
7+
8+
9+
10+
pre_data = np.asarray(pd.read_csv("./data/transcripts.csv" , header=None))
11+
12+
train = pd.read_csv("./data/text_train.csv", header=None)
13+
test = pd.read_csv("./data/text_test.csv", header=None)
14+
train = np.asarray(train)
15+
test = np.asarray(test)
16+
train_index = np.asarray(train[:,0], dtype = 'int')
17+
test_index = np.asarray(test[:,0], dtype = 'int')
18+
19+
20+
21+
def main(name):
22+
23+
path = "./data/"+name+"/"+name
24+
print path
25+
train_video_mapping=defaultdict(list)
26+
train_video_mapping_index=defaultdict(list)
27+
test_video_mapping=defaultdict(list)
28+
test_video_mapping_index=defaultdict(list)
29+
30+
data_train = np.asarray(pd.read_csv(path+"_train0.csv", header=None))
31+
data_test = np.asarray(pd.read_csv(path+"_test0.csv", header=None))
32+
33+
for i in xrange(train_index.shape[0]):
34+
train_video_mapping[pre_data[train_index[i]][0].rsplit("_",1)[0] ].append(train_index[i])
35+
train_video_mapping_index[pre_data[train_index[i]][0].rsplit("_",1)[0] ].append( int(pre_data[train_index[i]][0].rsplit("_",1)[1]) )
36+
37+
for i in xrange(test_index.shape[0]):
38+
test_video_mapping[pre_data[test_index[i]][0].rsplit("_",1)[0] ].append(test_index[i])
39+
test_video_mapping_index[pre_data[test_index[i]][0].rsplit("_",1)[0] ].append( int(pre_data[test_index[i]][0].rsplit("_",1)[1]) )
40+
41+
train_indices = dict((c, i) for i, c in enumerate(train_index))
42+
test_indices = dict((c, i) for i, c in enumerate(test_index))
43+
44+
max_len = 0
45+
for key,value in train_video_mapping.iteritems():
46+
max_len = max(max_len , len(value))
47+
for key,value in test_video_mapping.iteritems():
48+
max_len = max(max_len, len(value))
49+
50+
pad = np.asarray([0 for i in xrange(data_train[0][:-1].shape[0])])
51+
52+
print "Mapping train"
53+
54+
train_data_X =[]
55+
train_data_Y =[]
56+
train_length =[]
57+
for key,value in train_video_mapping.iteritems():
58+
59+
60+
lst = np.column_stack((train_video_mapping_index[key],value) )
61+
ind = np.asarray(sorted(lst,key=lambda x: x[0]))
62+
63+
64+
lst_X, lst_Y=[],[]
65+
ctr=0;
66+
for i in xrange(ind.shape[0]):
67+
ctr+=1
68+
#lst_X.append(preprocessing.scale( min_max_scaler.fit_transform(data_train[train_indices[ind[i,1]]][:-1])))
69+
lst_X.append(data_train[train_indices[ind[i,1]]][:-1])
70+
lst_Y.append(data_train[train_indices[ind[i,1]]][-1])
71+
train_length.append(ctr)
72+
for i in xrange(ctr, max_len):
73+
lst_X.append(pad)
74+
lst_Y.append(0) #dummy label
75+
76+
train_data_X.append(lst_X)
77+
train_data_Y.append(lst_Y)
78+
79+
80+
test_data_X =[]
81+
test_data_Y =[]
82+
test_length =[]
83+
84+
print "Mapping test"
85+
86+
for key,value in test_video_mapping.iteritems():
87+
88+
lst = np.column_stack((test_video_mapping_index[key],value) )
89+
ind = np.asarray(sorted(lst,key=lambda x: x[0]))
90+
91+
lst_X, lst_Y=[],[]
92+
ctr=0
93+
for i in xrange(ind.shape[0]):
94+
ctr+=1
95+
#lst_X.append(preprocessing.scale( min_max_scaler.transform(data_test[test_indices[ind[i,1]]][:-1])))
96+
lst_X.append(data_test[test_indices[ind[i,1]]][:-1])
97+
lst_Y.append(data_test[test_indices[ind[i,1]]][-1])
98+
test_length.append(ctr)
99+
for i in xrange(ctr, max_len):
100+
lst_X.append(pad)
101+
lst_Y.append(0) #dummy label
102+
103+
test_data_X.append(np.asarray(lst_X))
104+
test_data_Y.append(np.asarray(lst_Y))
105+
106+
train_data_X = np.asarray(train_data_X)
107+
test_data_X = np.asarray(test_data_X)
108+
print train_data_X.shape, test_data_X.shape,len(train_length), len(test_length)
109+
110+
print "Dumping data"
111+
with open('./input/'+name+'.pickle', 'wb') as handle:
112+
pickle.dump((train_data_X, np.asarray(train_data_Y), test_data_X, np.asarray(test_data_Y), max_len ,train_length, test_length), handle, protocol=pickle.HIGHEST_PROTOCOL)
113+
114+
115+
116+
117+
118+
119+
if __name__ == "__main__":
120+
121+
names = ['text','audio','video']
122+
for nm in names:
123+
main(nm)

0 commit comments

Comments
 (0)