From 8e607b1d97eb23311387f2e5688015e9750fb180 Mon Sep 17 00:00:00 2001 From: Sonali Agrawal Date: Mon, 3 Apr 2017 17:39:47 +0530 Subject: [PATCH] Add files via upload --- movieRatings_UI_run.py | 97 ++++++++++++++++++++++++++---------------- passwords.data | 8 +--- session.data | 1 + temp.data | 1 + 4 files changed, 64 insertions(+), 43 deletions(-) create mode 100644 session.data create mode 100644 temp.data diff --git a/movieRatings_UI_run.py b/movieRatings_UI_run.py index 8e32636..1e28019 100644 --- a/movieRatings_UI_run.py +++ b/movieRatings_UI_run.py @@ -8,7 +8,7 @@ from scipy import optimize num_movies = 1682 -num_users = 943 +num_users = 943 #updated by temp.data class MovieRatings(QtGui.QMainWindow): def __init__(self, parent=None): @@ -16,14 +16,23 @@ def __init__(self, parent=None): self.ui = Ui_MainWindow() self.ui.setupUi(self) self.initUI() - p_cols = ['user_id', 'Password'] + + #to get newuser_id + c_cols = ['current_user'] + current_user_data = pd.read_csv('session.data', sep='\t', names=c_cols, encoding='latin-1') + name = current_user_data['current_user'][0] + + p_cols = ['1user_id', '2Password', '3user_id'] #first user_id is user name, 3rd column is system generated passwords_data = pd.read_csv('passwords.data', sep='\t', names=p_cols, encoding='latin-1') - self.newuser_id = passwords_data['user_id'][0] + for i in range(len(passwords_data)): + if( passwords_data['1user_id'][i] == name ): + self.newuser_id = passwords_data['3user_id'][i] + break + print "newuser_id=",self.newuser_id self.ui.save_next_Button.clicked.connect(self.back) self.connections() self.movies() - self.newRatings() def initUI(self): self.setWindowTitle('Movie Ratings') @@ -41,43 +50,43 @@ def appendNewRatings(self): self.ratings_data = pd.read_csv('ml-100k/u.data', sep='\t', names=r_cols, encoding='latin-1') - d = {'1user_id': [self.newuser_id], '2movie_id': [1], '3rating': [self.newuser_ratings[0]] , '4unix_timestamp':[800000000]} + d = {'1user_id': [self.newuser_id], '2movie_id': [1], '3rating': self.newuser_ratings[0] , '4unix_timestamp':[800000000]} df = pd.DataFrame(d) df.to_csv('ml-100k/u.data',mode='a' ,sep='\t',index=False, header=False) - d = {'1user_id': [self.newuser_id], '2movie_id': [50], '3rating': [self.newuser_ratings[49]] , '4unix_timestamp':[800000001]} + d = {'1user_id': [self.newuser_id], '2movie_id': [50], '3rating': self.newuser_ratings[49] , '4unix_timestamp':[800000001]} df = pd.DataFrame(d) df.to_csv('ml-100k/u.data',mode='a' ,sep='\t',index=False, header=False) - d = {'1user_id': [self.newuser_id], '2movie_id': [71], '3rating': [self.newuser_ratings[70]] , '4unix_timestamp':[800000002]} + d = {'1user_id': [self.newuser_id], '2movie_id': [71], '3rating': self.newuser_ratings[70] , '4unix_timestamp':[800000002]} df = pd.DataFrame(d) df.to_csv('ml-100k/u.data',mode='a' ,sep='\t',index=False, header=False) - d = {'1user_id': [self.newuser_id], '2movie_id': [64], '3rating': [self.newuser_ratings[63]] , '4unix_timestamp':[800000003]} + d = {'1user_id': [self.newuser_id], '2movie_id': [64], '3rating': self.newuser_ratings[63] , '4unix_timestamp':[800000003]} df = pd.DataFrame(d) df.to_csv('ml-100k/u.data',mode='a' ,sep='\t',index=False, header=False) - d = {'1user_id': [self.newuser_id], '2movie_id': [69], '3rating': [self.newuser_ratings[68]] , '4unix_timestamp':[800000004]} + d = {'1user_id': [self.newuser_id], '2movie_id': [69], '3rating': self.newuser_ratings[68] , '4unix_timestamp':[800000004]} df = pd.DataFrame(d) df.to_csv('ml-100k/u.data',mode='a' ,sep='\t',index=False, header=False) - d = {'1user_id': [self.newuser_id], '2movie_id': [72], '3rating': [self.newuser_ratings[71]] , '4unix_timestamp':[800000005]} + d = {'1user_id': [self.newuser_id], '2movie_id': [72], '3rating': self.newuser_ratings[71] , '4unix_timestamp':[800000005]} df = pd.DataFrame(d) df.to_csv('ml-100k/u.data',mode='a' ,sep='\t',index=False, header=False) - d = {'1user_id': [self.newuser_id], '2movie_id': [82], '3rating': [self.newuser_ratings[81]] , '4unix_timestamp':[800000006]} + d = {'1user_id': [self.newuser_id], '2movie_id': [82], '3rating': self.newuser_ratings[81] , '4unix_timestamp':[800000006]} df = pd.DataFrame(d) df.to_csv('ml-100k/u.data',mode='a' ,sep='\t',index=False, header=False) - d = {'1user_id': [self.newuser_id], '2movie_id': [88], '3rating': [self.newuser_ratings[87]] , '4unix_timestamp':[800000007]} + d = {'1user_id': [self.newuser_id], '2movie_id': [88], '3rating': self.newuser_ratings[87] , '4unix_timestamp':[800000007]} df = pd.DataFrame(d) df.to_csv('ml-100k/u.data',mode='a' ,sep='\t',index=False, header=False) - d = {'1user_id': [self.newuser_id], '2movie_id': [94], '3rating': [self.newuser_ratings[93]] , '4unix_timestamp':[800000008]} + d = {'1user_id': [self.newuser_id], '2movie_id': [94], '3rating': self.newuser_ratings[93] , '4unix_timestamp':[800000008]} df = pd.DataFrame(d) df.to_csv('ml-100k/u.data',mode='a' ,sep='\t',index=False, header=False) - d = {'1user_id': [self.newuser_id], '2movie_id': [98], '3rating': [self.newuser_ratings[97]] , '4unix_timestamp':[800000009]} + d = {'1user_id': [self.newuser_id], '2movie_id': [98], '3rating': self.newuser_ratings[97] , '4unix_timestamp':[800000009]} df = pd.DataFrame(d) df.to_csv('ml-100k/u.data',mode='a' ,sep='\t',index=False, header=False) @@ -113,6 +122,7 @@ def sliderVal(self): self.ui.lineEdit_18.setText(str(self.ui.horizontalSlider_8.value())) self.ui.lineEdit_19.setText(str(self.ui.horizontalSlider_9.value())) self.ui.lineEdit_20.setText(str(self.ui.horizontalSlider_10.value())) + self.newRatings() def movies(self): i_cols = ['movie id', 'movie title' ,'release date','video release date', 'IMDb URL', 'unknown', 'Action', 'Adventure', @@ -133,30 +143,39 @@ def movies(self): def newRatings(self): global num_movies self.newuser_ratings = np.zeros((num_movies, 1)) - self.newuser_ratings[0] = self.ui.horizontalSlider_1.value() - self.newuser_ratings[49] = self.ui.horizontalSlider_2.value() - self.newuser_ratings[70] = self.ui.horizontalSlider_3.value() - self.newuser_ratings[63] = self.ui.horizontalSlider_4.value() - self.newuser_ratings[68] = self.ui.horizontalSlider_5.value() - self.newuser_ratings[71] = self.ui.horizontalSlider_6.value() - self.newuser_ratings[81] = self.ui.horizontalSlider_7.value() - self.newuser_ratings[87] = self.ui.horizontalSlider_8.value() - self.newuser_ratings[93] = self.ui.horizontalSlider_9.value() - self.newuser_ratings[97] = self.ui.horizontalSlider_10.value() - + self.newuser_ratings[0] = int(self.ui.horizontalSlider_1.value()) + self.newuser_ratings[97] = int(self.ui.horizontalSlider_10.value()) + self.newuser_ratings[49] = int(self.ui.horizontalSlider_2.value()) + self.newuser_ratings[70] = int(self.ui.horizontalSlider_3.value()) + self.newuser_ratings[63] = int(self.ui.horizontalSlider_4.value()) + self.newuser_ratings[68] = int(self.ui.horizontalSlider_5.value()) + self.newuser_ratings[71] = int(self.ui.horizontalSlider_6.value()) + self.newuser_ratings[81] = int(self.ui.horizontalSlider_7.value()) + self.newuser_ratings[87] = int(self.ui.horizontalSlider_8.value()) + self.newuser_ratings[93] = int(self.ui.horizontalSlider_9.value()) + self.newuser_ratings[97] = int(self.ui.horizontalSlider_10.value()) + def recommenderSystem(self): global num_movies global num_users - self.ratings = np.zeros((num_movies, num_users+1), dtype = np.uint8) + + #update num_users + cols = ['count'] + count_data = pd.read_csv('temp.data', sep='\t', names=cols, encoding='latin-1') + num_users = count_data['count'][0] - 1 + print "num_users=",num_users + + self.ratings = np.zeros((num_movies, num_users), dtype = np.uint8) #num_users updated #Create 2D ratings matrix for i in range(len(self.ratings_data)): col = (int)(self.ratings_data['1user_id'][i])-1 row = (int)(self.ratings_data['2movie_id'][i])-1 - self.ratings[row][col]=self.ratings_data['3rating'][i] + self.ratings[row][col]=(int)(self.ratings_data['3rating'][i]) + self.did_rate = (self.ratings != 0) * 1 - self.ratings, ratings_mean = normalize_ratings(self.ratings, self.did_rate) + self.ratings, ratings_mean = self.normalize_ratings() num_users = self.ratings.shape[1] #num_users gets updated i.e. increases by 1 num_features = 3 @@ -165,24 +184,28 @@ def recommenderSystem(self): initial_X_and_theta = np.r_[movie_features.T.flatten(), user_prefs.T.flatten()] reg_param = 30 - minimized_cost_and_optimal_params = optimize.fmin_cg(calculate_cost, fprime=calculate_gradient, x0=initial_X_and_theta, args=(ratings, did_rate, num_users, num_movies, num_features, reg_param), maxiter=100, disp=True, full_output=True ) + minimized_cost_and_optimal_params = optimize.fmin_cg(self.calculate_cost, fprime=self.calculate_gradient, x0=initial_X_and_theta, args=(self.ratings, self.did_rate, num_users, num_movies, num_features, reg_param), maxiter=100, disp=True, full_output=True ) cost, optimal_movie_features_and_user_prefs = minimized_cost_and_optimal_params[1], minimized_cost_and_optimal_params[0] - movie_features, user_prefs = unroll_params(optimal_movie_features_and_user_prefs, num_users, num_movies, num_features) + movie_features, user_prefs = self.unroll_params(optimal_movie_features_and_user_prefs, num_users, num_movies, num_features) # Make some predictions (movie recommendations). Dot product all_predictions = movie_features.dot( user_prefs.T ) # add back the ratings_mean column vector to my (our) predictions predictions_for_newuser = all_predictions[:, 0:1] + ratings_mean + i_cols = ['movie id', 'movie title' ,'release date','video release date', 'IMDb URL', 'unknown', 'Action', 'Adventure', + 'Animation', 'Children\'s', 'Comedy', 'Crime', 'Documentary', 'Drama', 'Fantasy', + 'Film-Noir', 'Horror', 'Musical', 'Mystery', 'Romance', 'Sci-Fi', 'Thriller', 'War', 'Western'] + items = pd.read_csv('ml-100k/u.item', sep='|', names=i_cols,encoding='latin-1') ind = np.argpartition(predictions_for_newuser, -1)[-5:] for i in range(len(ind)): - ind2 = ratings_data['2movie_id'][i] + ind2 = self.ratings_data['2movie_id'][i] #print items['movie title'][ind2] d = { 'movie_title': [ items['movie title'][ind2] ] } df = pd.DataFrame(d) df.to_csv('ml-100k/movie_reco.data',mode='a' ,sep='\t',index=False, header=False) - def normalize_ratings(ratings, did_rate): + def normalize_ratings(self): global num_movies num_movies = self.ratings.shape[0] @@ -198,7 +221,7 @@ def normalize_ratings(ratings, did_rate): return ratings_norm, ratings_mean - def unroll_params(X_and_theta, num_users, num_movies, num_features): + def unroll_params(self, X_and_theta, num_users, num_movies, num_features): # Retrieve the X and theta matrixes from X_and_theta, based on their dimensions (num_features, num_movies, num_movies) # -------------------------------------------------------------------------------------------------------------- # Get the first 30 (10 * 3) rows in the 48 X 1 column vector @@ -211,8 +234,8 @@ def unroll_params(X_and_theta, num_users, num_movies, num_features): theta = last_18.reshape(num_features, num_users ).transpose() return X, theta - def calculate_gradient(X_and_theta, ratings, did_rate, num_users, num_movies, num_features, reg_param): - X, theta = unroll_params(X_and_theta, num_users, num_movies, num_features) + def calculate_gradient(self, X_and_theta, ratings, did_rate, num_users, num_movies, num_features, reg_param): + X, theta = self.unroll_params(X_and_theta, num_users, num_movies, num_features) # we multiply by did_rate because we only want to consider observations for which a rating was given difference = X.dot( theta.T ) * did_rate - ratings @@ -222,8 +245,8 @@ def calculate_gradient(X_and_theta, ratings, did_rate, num_users, num_movies, nu # wrap the gradients back into a column vector return np.r_[X_grad.T.flatten(), theta_grad.T.flatten()] - def calculate_cost(X_and_theta, ratings, did_rate, num_users, num_movies, num_features, reg_param): - X, theta = unroll_params(X_and_theta, num_users, num_movies, num_features) + def calculate_cost(self, X_and_theta, ratings, did_rate, num_users, num_movies, num_features, reg_param): + X, theta = self.unroll_params(X_and_theta, num_users, num_movies, num_features) # we multiply (element-wise) by did_rate because we only want to consider observations for which a rating was given cost = np.sum( (X.dot( theta.T ) * did_rate - ratings) ** 2 ) / 2 # '**' means an element-wise power diff --git a/passwords.data b/passwords.data index 8d64214..cbfe895 100644 --- a/passwords.data +++ b/passwords.data @@ -1,6 +1,2 @@ -asmita asmita10 943 -sonali sonali10 944 -aishwarya dance 945 -yashu basketball 946 -rohit code 947 -boo booo 948 +rohitSuri RoShur 944 +sona bubbly 945 diff --git a/session.data b/session.data new file mode 100644 index 0000000..86ee042 --- /dev/null +++ b/session.data @@ -0,0 +1 @@ +sona diff --git a/temp.data b/temp.data new file mode 100644 index 0000000..8152c0d --- /dev/null +++ b/temp.data @@ -0,0 +1 @@ +946