Skip to content

Commit

Permalink
Add files via upload
Browse files Browse the repository at this point in the history
  • Loading branch information
sonali9696 authored Apr 3, 2017
1 parent 65dd305 commit 8e607b1
Show file tree
Hide file tree
Showing 4 changed files with 64 additions and 43 deletions.
97 changes: 60 additions & 37 deletions movieRatings_UI_run.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,22 +8,31 @@
from scipy import optimize

num_movies = 1682
num_users = 943
num_users = 943 #updated by temp.data

class MovieRatings(QtGui.QMainWindow):
def __init__(self, parent=None):
QtGui.QWidget.__init__(self,parent)
self.ui = Ui_MainWindow()
self.ui.setupUi(self)
self.initUI()
p_cols = ['user_id', 'Password']

#to get newuser_id
c_cols = ['current_user']
current_user_data = pd.read_csv('session.data', sep='\t', names=c_cols, encoding='latin-1')
name = current_user_data['current_user'][0]

p_cols = ['1user_id', '2Password', '3user_id'] #first user_id is user name, 3rd column is system generated
passwords_data = pd.read_csv('passwords.data', sep='\t', names=p_cols, encoding='latin-1')
self.newuser_id = passwords_data['user_id'][0]
for i in range(len(passwords_data)):
if( passwords_data['1user_id'][i] == name ):
self.newuser_id = passwords_data['3user_id'][i]
break

print "newuser_id=",self.newuser_id
self.ui.save_next_Button.clicked.connect(self.back)
self.connections()
self.movies()
self.newRatings()

def initUI(self):
self.setWindowTitle('Movie Ratings')
Expand All @@ -41,43 +50,43 @@ def appendNewRatings(self):
self.ratings_data = pd.read_csv('ml-100k/u.data', sep='\t', names=r_cols, encoding='latin-1')


d = {'1user_id': [self.newuser_id], '2movie_id': [1], '3rating': [self.newuser_ratings[0]] , '4unix_timestamp':[800000000]}
d = {'1user_id': [self.newuser_id], '2movie_id': [1], '3rating': self.newuser_ratings[0] , '4unix_timestamp':[800000000]}
df = pd.DataFrame(d)
df.to_csv('ml-100k/u.data',mode='a' ,sep='\t',index=False, header=False)

d = {'1user_id': [self.newuser_id], '2movie_id': [50], '3rating': [self.newuser_ratings[49]] , '4unix_timestamp':[800000001]}
d = {'1user_id': [self.newuser_id], '2movie_id': [50], '3rating': self.newuser_ratings[49] , '4unix_timestamp':[800000001]}
df = pd.DataFrame(d)
df.to_csv('ml-100k/u.data',mode='a' ,sep='\t',index=False, header=False)

d = {'1user_id': [self.newuser_id], '2movie_id': [71], '3rating': [self.newuser_ratings[70]] , '4unix_timestamp':[800000002]}
d = {'1user_id': [self.newuser_id], '2movie_id': [71], '3rating': self.newuser_ratings[70] , '4unix_timestamp':[800000002]}
df = pd.DataFrame(d)
df.to_csv('ml-100k/u.data',mode='a' ,sep='\t',index=False, header=False)

d = {'1user_id': [self.newuser_id], '2movie_id': [64], '3rating': [self.newuser_ratings[63]] , '4unix_timestamp':[800000003]}
d = {'1user_id': [self.newuser_id], '2movie_id': [64], '3rating': self.newuser_ratings[63] , '4unix_timestamp':[800000003]}
df = pd.DataFrame(d)
df.to_csv('ml-100k/u.data',mode='a' ,sep='\t',index=False, header=False)

d = {'1user_id': [self.newuser_id], '2movie_id': [69], '3rating': [self.newuser_ratings[68]] , '4unix_timestamp':[800000004]}
d = {'1user_id': [self.newuser_id], '2movie_id': [69], '3rating': self.newuser_ratings[68] , '4unix_timestamp':[800000004]}
df = pd.DataFrame(d)
df.to_csv('ml-100k/u.data',mode='a' ,sep='\t',index=False, header=False)

d = {'1user_id': [self.newuser_id], '2movie_id': [72], '3rating': [self.newuser_ratings[71]] , '4unix_timestamp':[800000005]}
d = {'1user_id': [self.newuser_id], '2movie_id': [72], '3rating': self.newuser_ratings[71] , '4unix_timestamp':[800000005]}
df = pd.DataFrame(d)
df.to_csv('ml-100k/u.data',mode='a' ,sep='\t',index=False, header=False)

d = {'1user_id': [self.newuser_id], '2movie_id': [82], '3rating': [self.newuser_ratings[81]] , '4unix_timestamp':[800000006]}
d = {'1user_id': [self.newuser_id], '2movie_id': [82], '3rating': self.newuser_ratings[81] , '4unix_timestamp':[800000006]}
df = pd.DataFrame(d)
df.to_csv('ml-100k/u.data',mode='a' ,sep='\t',index=False, header=False)

d = {'1user_id': [self.newuser_id], '2movie_id': [88], '3rating': [self.newuser_ratings[87]] , '4unix_timestamp':[800000007]}
d = {'1user_id': [self.newuser_id], '2movie_id': [88], '3rating': self.newuser_ratings[87] , '4unix_timestamp':[800000007]}
df = pd.DataFrame(d)
df.to_csv('ml-100k/u.data',mode='a' ,sep='\t',index=False, header=False)

d = {'1user_id': [self.newuser_id], '2movie_id': [94], '3rating': [self.newuser_ratings[93]] , '4unix_timestamp':[800000008]}
d = {'1user_id': [self.newuser_id], '2movie_id': [94], '3rating': self.newuser_ratings[93] , '4unix_timestamp':[800000008]}
df = pd.DataFrame(d)
df.to_csv('ml-100k/u.data',mode='a' ,sep='\t',index=False, header=False)

d = {'1user_id': [self.newuser_id], '2movie_id': [98], '3rating': [self.newuser_ratings[97]] , '4unix_timestamp':[800000009]}
d = {'1user_id': [self.newuser_id], '2movie_id': [98], '3rating': self.newuser_ratings[97] , '4unix_timestamp':[800000009]}
df = pd.DataFrame(d)

df.to_csv('ml-100k/u.data',mode='a' ,sep='\t',index=False, header=False)
Expand Down Expand Up @@ -113,6 +122,7 @@ def sliderVal(self):
self.ui.lineEdit_18.setText(str(self.ui.horizontalSlider_8.value()))
self.ui.lineEdit_19.setText(str(self.ui.horizontalSlider_9.value()))
self.ui.lineEdit_20.setText(str(self.ui.horizontalSlider_10.value()))
self.newRatings()

def movies(self):
i_cols = ['movie id', 'movie title' ,'release date','video release date', 'IMDb URL', 'unknown', 'Action', 'Adventure',
Expand All @@ -133,30 +143,39 @@ def movies(self):
def newRatings(self):
global num_movies
self.newuser_ratings = np.zeros((num_movies, 1))
self.newuser_ratings[0] = self.ui.horizontalSlider_1.value()
self.newuser_ratings[49] = self.ui.horizontalSlider_2.value()
self.newuser_ratings[70] = self.ui.horizontalSlider_3.value()
self.newuser_ratings[63] = self.ui.horizontalSlider_4.value()
self.newuser_ratings[68] = self.ui.horizontalSlider_5.value()
self.newuser_ratings[71] = self.ui.horizontalSlider_6.value()
self.newuser_ratings[81] = self.ui.horizontalSlider_7.value()
self.newuser_ratings[87] = self.ui.horizontalSlider_8.value()
self.newuser_ratings[93] = self.ui.horizontalSlider_9.value()
self.newuser_ratings[97] = self.ui.horizontalSlider_10.value()

self.newuser_ratings[0] = int(self.ui.horizontalSlider_1.value())
self.newuser_ratings[97] = int(self.ui.horizontalSlider_10.value())
self.newuser_ratings[49] = int(self.ui.horizontalSlider_2.value())
self.newuser_ratings[70] = int(self.ui.horizontalSlider_3.value())
self.newuser_ratings[63] = int(self.ui.horizontalSlider_4.value())
self.newuser_ratings[68] = int(self.ui.horizontalSlider_5.value())
self.newuser_ratings[71] = int(self.ui.horizontalSlider_6.value())
self.newuser_ratings[81] = int(self.ui.horizontalSlider_7.value())
self.newuser_ratings[87] = int(self.ui.horizontalSlider_8.value())
self.newuser_ratings[93] = int(self.ui.horizontalSlider_9.value())
self.newuser_ratings[97] = int(self.ui.horizontalSlider_10.value())


def recommenderSystem(self):
global num_movies
global num_users
self.ratings = np.zeros((num_movies, num_users+1), dtype = np.uint8)

#update num_users
cols = ['count']
count_data = pd.read_csv('temp.data', sep='\t', names=cols, encoding='latin-1')
num_users = count_data['count'][0] - 1
print "num_users=",num_users

self.ratings = np.zeros((num_movies, num_users), dtype = np.uint8) #num_users updated
#Create 2D ratings matrix
for i in range(len(self.ratings_data)):
col = (int)(self.ratings_data['1user_id'][i])-1
row = (int)(self.ratings_data['2movie_id'][i])-1
self.ratings[row][col]=self.ratings_data['3rating'][i]
self.ratings[row][col]=(int)(self.ratings_data['3rating'][i])

self.did_rate = (self.ratings != 0) * 1

self.ratings, ratings_mean = normalize_ratings(self.ratings, self.did_rate)
self.ratings, ratings_mean = self.normalize_ratings()
num_users = self.ratings.shape[1] #num_users gets updated i.e. increases by 1
num_features = 3

Expand All @@ -165,24 +184,28 @@ def recommenderSystem(self):
initial_X_and_theta = np.r_[movie_features.T.flatten(), user_prefs.T.flatten()]

reg_param = 30
minimized_cost_and_optimal_params = optimize.fmin_cg(calculate_cost, fprime=calculate_gradient, x0=initial_X_and_theta, args=(ratings, did_rate, num_users, num_movies, num_features, reg_param), maxiter=100, disp=True, full_output=True )
minimized_cost_and_optimal_params = optimize.fmin_cg(self.calculate_cost, fprime=self.calculate_gradient, x0=initial_X_and_theta, args=(self.ratings, self.did_rate, num_users, num_movies, num_features, reg_param), maxiter=100, disp=True, full_output=True )
cost, optimal_movie_features_and_user_prefs = minimized_cost_and_optimal_params[1], minimized_cost_and_optimal_params[0]

movie_features, user_prefs = unroll_params(optimal_movie_features_and_user_prefs, num_users, num_movies, num_features)
movie_features, user_prefs = self.unroll_params(optimal_movie_features_and_user_prefs, num_users, num_movies, num_features)
# Make some predictions (movie recommendations). Dot product
all_predictions = movie_features.dot( user_prefs.T )
# add back the ratings_mean column vector to my (our) predictions
predictions_for_newuser = all_predictions[:, 0:1] + ratings_mean

i_cols = ['movie id', 'movie title' ,'release date','video release date', 'IMDb URL', 'unknown', 'Action', 'Adventure',
'Animation', 'Children\'s', 'Comedy', 'Crime', 'Documentary', 'Drama', 'Fantasy',
'Film-Noir', 'Horror', 'Musical', 'Mystery', 'Romance', 'Sci-Fi', 'Thriller', 'War', 'Western']
items = pd.read_csv('ml-100k/u.item', sep='|', names=i_cols,encoding='latin-1')
ind = np.argpartition(predictions_for_newuser, -1)[-5:]
for i in range(len(ind)):
ind2 = ratings_data['2movie_id'][i]
ind2 = self.ratings_data['2movie_id'][i]
#print items['movie title'][ind2]
d = { 'movie_title': [ items['movie title'][ind2] ] }
df = pd.DataFrame(d)
df.to_csv('ml-100k/movie_reco.data',mode='a' ,sep='\t',index=False, header=False)

def normalize_ratings(ratings, did_rate):
def normalize_ratings(self):
global num_movies
num_movies = self.ratings.shape[0]

Expand All @@ -198,7 +221,7 @@ def normalize_ratings(ratings, did_rate):

return ratings_norm, ratings_mean

def unroll_params(X_and_theta, num_users, num_movies, num_features):
def unroll_params(self, X_and_theta, num_users, num_movies, num_features):
# Retrieve the X and theta matrixes from X_and_theta, based on their dimensions (num_features, num_movies, num_movies)
# --------------------------------------------------------------------------------------------------------------
# Get the first 30 (10 * 3) rows in the 48 X 1 column vector
Expand All @@ -211,8 +234,8 @@ def unroll_params(X_and_theta, num_users, num_movies, num_features):
theta = last_18.reshape(num_features, num_users ).transpose()
return X, theta

def calculate_gradient(X_and_theta, ratings, did_rate, num_users, num_movies, num_features, reg_param):
X, theta = unroll_params(X_and_theta, num_users, num_movies, num_features)
def calculate_gradient(self, X_and_theta, ratings, did_rate, num_users, num_movies, num_features, reg_param):
X, theta = self.unroll_params(X_and_theta, num_users, num_movies, num_features)

# we multiply by did_rate because we only want to consider observations for which a rating was given
difference = X.dot( theta.T ) * did_rate - ratings
Expand All @@ -222,8 +245,8 @@ def calculate_gradient(X_and_theta, ratings, did_rate, num_users, num_movies, nu
# wrap the gradients back into a column vector
return np.r_[X_grad.T.flatten(), theta_grad.T.flatten()]

def calculate_cost(X_and_theta, ratings, did_rate, num_users, num_movies, num_features, reg_param):
X, theta = unroll_params(X_and_theta, num_users, num_movies, num_features)
def calculate_cost(self, X_and_theta, ratings, did_rate, num_users, num_movies, num_features, reg_param):
X, theta = self.unroll_params(X_and_theta, num_users, num_movies, num_features)
# we multiply (element-wise) by did_rate because we only want to consider observations for which a rating was given
cost = np.sum( (X.dot( theta.T ) * did_rate - ratings) ** 2 ) / 2
# '**' means an element-wise power
Expand Down
8 changes: 2 additions & 6 deletions passwords.data
Original file line number Diff line number Diff line change
@@ -1,6 +1,2 @@
asmita asmita10 943
sonali sonali10 944
aishwarya dance 945
yashu basketball 946
rohit code 947
boo booo 948
rohitSuri RoShur 944
sona bubbly 945
1 change: 1 addition & 0 deletions session.data
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
sona
1 change: 1 addition & 0 deletions temp.data
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
946

0 comments on commit 8e607b1

Please sign in to comment.