From 6e70ce30c709b932e4d70a4eb8f8555ed3c62598 Mon Sep 17 00:00:00 2001 From: sayampradhan <112542130+sayampradhan@users.noreply.github.com> Date: Sat, 15 Oct 2022 01:18:56 +0530 Subject: [PATCH] Clean Code --- movie recommandation/main.py | 168 +++++++++++++++++++++-------------- 1 file changed, 103 insertions(+), 65 deletions(-) diff --git a/movie recommandation/main.py b/movie recommandation/main.py index 06da145..5e7cc34 100644 --- a/movie recommandation/main.py +++ b/movie recommandation/main.py @@ -10,18 +10,20 @@ import requests # load the nlp model and tfidf vectorizer from disk -filename = 'nlp_model.pkl' -clf = pickle.load(open(filename, 'rb')) -vectorizer = pickle.load(open('tranform.pkl','rb')) +filename = "nlp_model.pkl" +clf = pickle.load(open(filename, "rb")) +vectorizer = pickle.load(open("tranform.pkl", "rb")) + def create_similarity(): - data = pd.read_csv('main_data.csv') + data = pd.read_csv("main_data.csv") # creating a count matrix cv = CountVectorizer() - count_matrix = cv.fit_transform(data['comb']) + count_matrix = cv.fit_transform(data["comb"]) # creating a similarity score matrix similarity = cosine_similarity(count_matrix) - return data,similarity + return data, similarity + def rcmd(m): m = m.lower() @@ -30,70 +32,76 @@ def rcmd(m): similarity.shape except: data, similarity = create_similarity() - if m not in data['movie_title'].unique(): - return('Sorry! The movie that you have requested is not in our database. Please check the spelling or try with some other movies') + if m not in data["movie_title"].unique(): + return "Sorry! The movie that you have requested is not in our database. Please check the spelling or try with some other movies" else: - i = data.loc[data['movie_title']==m].index[0] + i = data.loc[data["movie_title"] == m].index[0] lst = list(enumerate(similarity[i])) - lst = sorted(lst, key = lambda x:x[1] ,reverse=True) - lst = lst[1:11] # excluding first item since it is the requested movie itself + lst = sorted(lst, key=lambda x: x[1], reverse=True) + lst = lst[1:11] # excluding first item since it is the requested movie itself l = [] for i in range(len(lst)): a = lst[i][0] - l.append(data['movie_title'][a]) + l.append(data["movie_title"][a]) return l - + + # converting list of string to list (eg. "["abc","def"]" to ["abc","def"]) def convert_to_list(my_list): my_list = my_list.split('","') - my_list[0] = my_list[0].replace('["','') - my_list[-1] = my_list[-1].replace('"]','') + my_list[0] = my_list[0].replace('["', "") + my_list[-1] = my_list[-1].replace('"]', "") return my_list + def get_suggestions(): - data = pd.read_csv('main_data.csv') - return list(data['movie_title'].str.capitalize()) + data = pd.read_csv("main_data.csv") + return list(data["movie_title"].str.capitalize()) + app = Flask(__name__) + @app.route("/") @app.route("/home") def home(): suggestions = get_suggestions() - return render_template('home.html',suggestions=suggestions) + return render_template("home.html", suggestions=suggestions) -@app.route("/similarity",methods=["POST"]) + +@app.route("/similarity", methods=["POST"]) def similarity(): - movie = request.form['name'] + movie = request.form["name"] rc = rcmd(movie) - if type(rc)==type('string'): + if type(rc) == type("string"): return rc else: - m_str="---".join(rc) + m_str = "---".join(rc) return m_str -@app.route("/recommend",methods=["POST"]) + +@app.route("/recommend", methods=["POST"]) def recommend(): # getting data from AJAX request - title = request.form['title'] - cast_ids = request.form['cast_ids'] - cast_names = request.form['cast_names'] - cast_chars = request.form['cast_chars'] - cast_bdays = request.form['cast_bdays'] - cast_bios = request.form['cast_bios'] - cast_places = request.form['cast_places'] - cast_profiles = request.form['cast_profiles'] - imdb_id = request.form['imdb_id'] - poster = request.form['poster'] - genres = request.form['genres'] - overview = request.form['overview'] - vote_average = request.form['rating'] - vote_count = request.form['vote_count'] - release_date = request.form['release_date'] - runtime = request.form['runtime'] - status = request.form['status'] - rec_movies = request.form['rec_movies'] - rec_posters = request.form['rec_posters'] + title = request.form["title"] + cast_ids = request.form["cast_ids"] + cast_names = request.form["cast_names"] + cast_chars = request.form["cast_chars"] + cast_bdays = request.form["cast_bdays"] + cast_bios = request.form["cast_bios"] + cast_places = request.form["cast_places"] + cast_profiles = request.form["cast_profiles"] + imdb_id = request.form["imdb_id"] + poster = request.form["poster"] + genres = request.form["genres"] + overview = request.form["overview"] + vote_average = request.form["rating"] + vote_count = request.form["vote_count"] + release_date = request.form["release_date"] + runtime = request.form["runtime"] + status = request.form["status"] + rec_movies = request.form["rec_movies"] + rec_posters = request.form["rec_posters"] # get movie suggestions for auto complete suggestions = get_suggestions() @@ -107,30 +115,44 @@ def recommend(): cast_bdays = convert_to_list(cast_bdays) cast_bios = convert_to_list(cast_bios) cast_places = convert_to_list(cast_places) - + # convert string to list (eg. "[1,2,3]" to [1,2,3]) - cast_ids = cast_ids.split(',') - cast_ids[0] = cast_ids[0].replace("[","") - cast_ids[-1] = cast_ids[-1].replace("]","") - + cast_ids = cast_ids.split(",") + cast_ids[0] = cast_ids[0].replace("[", "") + cast_ids[-1] = cast_ids[-1].replace("]", "") + # rendering the string to python string for i in range(len(cast_bios)): - cast_bios[i] = cast_bios[i].replace(r'\n', '\n').replace(r'\"','\"') - + cast_bios[i] = cast_bios[i].replace(r"\n", "\n").replace(r"\"", '"') + # combining multiple lists as a dictionary which can be passed to the html file so that it can be processed easily and the order of information will be preserved movie_cards = {rec_posters[i]: rec_movies[i] for i in range(len(rec_posters))} - - casts = {cast_names[i]:[cast_ids[i], cast_chars[i], cast_profiles[i]] for i in range(len(cast_profiles))} - cast_details = {cast_names[i]:[cast_ids[i], cast_profiles[i], cast_bdays[i], cast_places[i], cast_bios[i]] for i in range(len(cast_places))} + casts = { + cast_names[i]: [cast_ids[i], cast_chars[i], cast_profiles[i]] + for i in range(len(cast_profiles)) + } + + cast_details = { + cast_names[i]: [ + cast_ids[i], + cast_profiles[i], + cast_bdays[i], + cast_places[i], + cast_bios[i], + ] + for i in range(len(cast_places)) + } # web scraping to get user reviews from IMDB site - sauce = urllib.request.urlopen('https://www.imdb.com/title/{}/reviews?ref_=tt_ov_rt'.format(imdb_id)).read() - soup = bs.BeautifulSoup(sauce,'lxml') - soup_result = soup.find_all("div",{"class":"text show-more__control"}) - - reviews_list = [] # list of reviews - reviews_status = [] # list of comments (good or bad) + sauce = urllib.request.urlopen( + "https://www.imdb.com/title/{}/reviews?ref_=tt_ov_rt".format(imdb_id) + ).read() + soup = bs.BeautifulSoup(sauce, "lxml") + soup_result = soup.find_all("div", {"class": "text show-more__control"}) + + reviews_list = [] # list of reviews + reviews_status = [] # list of comments (good or bad) for reviews in soup_result: if reviews.string: reviews_list.append(reviews.string) @@ -138,15 +160,31 @@ def recommend(): movie_review_list = np.array([reviews.string]) movie_vector = vectorizer.transform(movie_review_list) pred = clf.predict(movie_vector) - reviews_status.append('Good' if pred else 'Bad') + reviews_status.append("Good" if pred else "Bad") # combining reviews and comments into a dictionary - movie_reviews = {reviews_list[i]: reviews_status[i] for i in range(len(reviews_list))} + movie_reviews = { + reviews_list[i]: reviews_status[i] for i in range(len(reviews_list)) + } # passing all the data to the html file - return render_template('recommend.html',title=title,poster=poster,overview=overview,vote_average=vote_average, - vote_count=vote_count,release_date=release_date,runtime=runtime,status=status,genres=genres, - movie_cards=movie_cards,reviews=movie_reviews,casts=casts,cast_details=cast_details) - -if __name__ == '__main__': + return render_template( + "recommend.html", + title=title, + poster=poster, + overview=overview, + vote_average=vote_average, + vote_count=vote_count, + release_date=release_date, + runtime=runtime, + status=status, + genres=genres, + movie_cards=movie_cards, + reviews=movie_reviews, + casts=casts, + cast_details=cast_details, + ) + + +if __name__ == "__main__": app.run(debug=True)