declare-lab
diff --git a/‎contextual-utterance-level-multimodal-sentiment-analysis/README.md
Lines changed: 59 additions & 0 deletions b/‎contextual-utterance-level-multimodal-sentiment-analysis/README.md
Lines changed: 59 additions & 0 deletions
diff --git a/‎contextual-utterance-level-multimodal-sentiment-analysis/create_data.py
Lines changed: 123 additions & 0 deletions b/‎contextual-utterance-level-multimodal-sentiment-analysis/create_data.py
Lines changed: 123 additions & 0 deletions
@@ -0,0 +1,59 @@
+# 🔥 🔥 🔥 Notice: This repository will no longer be maintained. Instead, we are are moving all our multimodal works to this new centralized repository: [https://github.com/declare-lab/multimodal-deep-learning](https://github.com/declare-lab/multimodal-deep-learning). 
+
+# Context-Dependent Sentiment Analysis in User-Generated Videos
+Code for the paper [Context-Dependent Sentiment Analysis in User-Generated Videos](http://sentic.net/context-dependent-sentiment-analysis-in-user-generated-videos.pdf) (ACL 2017).
+
+## NOTE: Here is the updated version of the code - https://github.com/soujanyaporia/multimodal-sentiment-analysis
+
+### Requirements
+Code is written in Python (2.7) and requires Keras (2.0.6) with Theano backend.
+
+### Description
+In this paper, we propose a LSTM-based model that enables utterances to capture contextual information from their surroundings in the same video, thus aiding the classification process in multimodal sentiment analysis.
+
+![Alt text](network.jpg?raw=true "Title")
+
+This repository contains the code for the mentioned paper. Each contextual LSTM (Figure 2 in the paper) is implemented as shown in above figure. For more details, please refer to the paper.   
+Note: Unlike the paper, we haven't used an SVM on the penultimate layer. This is in effort to keep the whole network differentiable at some performance cost.
+
+### Dataset
+We provide results on the [MOSI dataset](https://arxiv.org/pdf/1606.06259.pdf)  
+Please cite the creators 
+
+
+### Preprocessing
+As data is typically present in utterance format, we combine all the utterances belonging to a video using the following code
+
+```
+python create_data.py
+```
+
+Note: This will create speaker independent train and test splits 
+
+### Running sc-lstm
+
+Sample command:
+
+```
+python lstm.py --unimodal True
+python lstm.py --unimodal False
+```
+
+Note: Keeping the unimodal flag as True (default False) shall train all unimodal lstms first (level 1 of the network mentioned in the paper)
+
+### Citation 
+
+If using this code, please cite our work using : 
+```
+@inproceedings{soujanyaacl17,
+  title={Context-dependent sentiment analysis in user-generated videos},
+  author={Poria, Soujanya  and Cambria, Erik and Hazarika, Devamanyu and Mazumder, Navonil and Zadeh, Amir and Morency, Louis-Philippe},
+  booktitle={Association for Computational Linguistics},
+  year={2017}
+}
+```
+
+### Credits
+
+Devamanyu Hazarika, Soujanya Poria
+
@@ -0,0 +1,123 @@
+import numpy as np, pandas as pd
+from collections import defaultdict
+import pickle
+from sklearn import preprocessing
+min_max_scaler = preprocessing.MinMaxScaler()
+
+
+
+
+pre_data = np.asarray(pd.read_csv("./data/transcripts.csv" , header=None))
+
+train = pd.read_csv("./data/text_train.csv", header=None)
+test = pd.read_csv("./data/text_test.csv", header=None)
+train = np.asarray(train)
+test = np.asarray(test)
+train_index = np.asarray(train[:,0], dtype = 'int')
+test_index = np.asarray(test[:,0], dtype = 'int')
+
+
+
+def main(name):
+
+	path = "./data/"+name+"/"+name
+	print path
+	train_video_mapping=defaultdict(list)
+	train_video_mapping_index=defaultdict(list)
+	test_video_mapping=defaultdict(list)
+	test_video_mapping_index=defaultdict(list)
+
+	data_train = np.asarray(pd.read_csv(path+"_train0.csv", header=None))
+	data_test = np.asarray(pd.read_csv(path+"_test0.csv", header=None))
+
+	for i in xrange(train_index.shape[0]):
+		train_video_mapping[pre_data[train_index[i]][0].rsplit("_",1)[0] ].append(train_index[i])
+		train_video_mapping_index[pre_data[train_index[i]][0].rsplit("_",1)[0] ].append( int(pre_data[train_index[i]][0].rsplit("_",1)[1]) )
+
+	for i in xrange(test_index.shape[0]):
+		test_video_mapping[pre_data[test_index[i]][0].rsplit("_",1)[0] ].append(test_index[i])
+		test_video_mapping_index[pre_data[test_index[i]][0].rsplit("_",1)[0] ].append( int(pre_data[test_index[i]][0].rsplit("_",1)[1]) )
+
+	train_indices = dict((c, i) for i, c in enumerate(train_index))
+	test_indices = dict((c, i) for i, c in enumerate(test_index))
+
+	max_len = 0
+	for key,value in train_video_mapping.iteritems():
+		max_len = max(max_len , len(value))
+	for key,value in test_video_mapping.iteritems():
+		max_len = max(max_len, len(value))
+
+	pad = np.asarray([0 for i in xrange(data_train[0][:-1].shape[0])])
+
+	print "Mapping train"
+
+	train_data_X =[]
+	train_data_Y =[]
+	train_length =[]
+	for key,value in train_video_mapping.iteritems():
+
+		
+		lst = np.column_stack((train_video_mapping_index[key],value)  )
+		ind = np.asarray(sorted(lst,key=lambda x: x[0]))
+
+
+		lst_X, lst_Y=[],[]
+		ctr=0;
+		for i in xrange(ind.shape[0]):
+			ctr+=1
+			#lst_X.append(preprocessing.scale( min_max_scaler.fit_transform(data_train[train_indices[ind[i,1]]][:-1])))
+			lst_X.append(data_train[train_indices[ind[i,1]]][:-1])
+			lst_Y.append(data_train[train_indices[ind[i,1]]][-1])
+		train_length.append(ctr)
+		for i in xrange(ctr, max_len):
+			lst_X.append(pad)
+			lst_Y.append(0) #dummy label
+		
+		train_data_X.append(lst_X)
+		train_data_Y.append(lst_Y)
+	
+
+	test_data_X =[]
+	test_data_Y =[]
+	test_length =[]
+
+	print "Mapping test"
+
+	for key,value in test_video_mapping.iteritems():
+
+		lst = np.column_stack((test_video_mapping_index[key],value)  )
+		ind = np.asarray(sorted(lst,key=lambda x: x[0]))
+
+		lst_X, lst_Y=[],[]
+		ctr=0
+		for i in xrange(ind.shape[0]):
+			ctr+=1
+			#lst_X.append(preprocessing.scale( min_max_scaler.transform(data_test[test_indices[ind[i,1]]][:-1])))
+			lst_X.append(data_test[test_indices[ind[i,1]]][:-1])
+			lst_Y.append(data_test[test_indices[ind[i,1]]][-1])
+		test_length.append(ctr)
+		for i in xrange(ctr, max_len):
+			lst_X.append(pad)
+			lst_Y.append(0) #dummy label
+
+		test_data_X.append(np.asarray(lst_X))
+		test_data_Y.append(np.asarray(lst_Y))
+
+	train_data_X = np.asarray(train_data_X)
+	test_data_X = np.asarray(test_data_X)
+	print train_data_X.shape, test_data_X.shape,len(train_length), len(test_length)
+
+	print "Dumping data"
+	with open('./input/'+name+'.pickle', 'wb') as handle:
+		pickle.dump((train_data_X,  np.asarray(train_data_Y), test_data_X, np.asarray(test_data_Y), max_len ,train_length, test_length), handle, protocol=pickle.HIGHEST_PROTOCOL)
+
+
+
+	
+
+
+if __name__ == "__main__":
+
+	names = ['text','audio','video']
+	for nm in names:
+		main(nm)