-
Notifications
You must be signed in to change notification settings - Fork 58
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Movie Recommender Zeus #37
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,121 @@ | ||
package nearsoft.academy.bigdata.recommendation; | ||
|
||
import java.io.*; | ||
import java.util.*; | ||
import java.nio.charset.StandardCharsets; | ||
import java.nio.file.Files; | ||
import java.nio.file.Paths; | ||
import java.util.stream.Collectors; | ||
import java.util.stream.Stream; | ||
import org.apache.commons.collections4.BidiMap; | ||
import org.apache.commons.collections4.bidimap.DualHashBidiMap; | ||
import org.apache.mahout.cf.taste.common.TasteException; | ||
import org.apache.mahout.cf.taste.impl.model.file.FileDataModel; | ||
import org.apache.mahout.cf.taste.impl.neighborhood.ThresholdUserNeighborhood; | ||
import org.apache.mahout.cf.taste.impl.recommender.GenericUserBasedRecommender; | ||
import org.apache.mahout.cf.taste.impl.similarity.PearsonCorrelationSimilarity; | ||
import org.apache.mahout.cf.taste.model.DataModel; | ||
import org.apache.mahout.cf.taste.neighborhood.UserNeighborhood; | ||
import org.apache.mahout.cf.taste.recommender.UserBasedRecommender; | ||
import org.apache.mahout.cf.taste.similarity.UserSimilarity; | ||
|
||
public class MovieRecommender { | ||
|
||
private int totalReviews = 0; | ||
private int totalProducts = 0; | ||
private int totalUsers = 0; | ||
|
||
private Hashtable<String, Integer> users = new Hashtable<>(); | ||
private BidiMap<String, Integer> products = new DualHashBidiMap<>(); | ||
|
||
public MovieRecommender(String moviesPath) { | ||
try { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You can have some of the logic in the constructor but not the entire functionality, also a code full of conditionals is a bad signal, try to clean up the code with polymorphism or at least use a switch statement |
||
|
||
File file = new File(moviesPath); | ||
FileReader fr = new FileReader(file); | ||
BufferedReader br = new BufferedReader(fr); | ||
|
||
File csvFile = new File("movies.csv"); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. read these values from a property file to make your code more flexible and maintainable |
||
FileWriter fw = new FileWriter(csvFile); | ||
|
||
String line; | ||
|
||
String score = null; | ||
String productId = null; | ||
String userId = null; | ||
|
||
while((line = br.readLine()) != null){ | ||
if (line.startsWith("review/score")) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. use constants to avoid magic numbers and repeated string |
||
score = line.split(" ")[1]; | ||
totalReviews++; | ||
} | ||
else if (line.startsWith("product/productId")) { | ||
productId = line.split(" ")[1]; | ||
if (!products.containsKey(productId)) { | ||
totalProducts++; | ||
products.put(productId, totalProducts); | ||
} | ||
} | ||
else if (line.startsWith("review/userId")) { | ||
userId = line.split(" ")[1]; | ||
if (!users.containsKey(userId)) { | ||
totalUsers++; | ||
users.put(userId, totalUsers); | ||
} | ||
} | ||
if (productId != null && userId != null && score != null) { | ||
|
||
StringBuilder row = new StringBuilder(); | ||
row.append(String.valueOf(users.get(userId)) + "," | ||
+ String.valueOf(products.get(productId)) + "," | ||
+ score); | ||
row.append("\n"); | ||
fw.write(row.toString()); | ||
|
||
productId = null; | ||
userId = null; | ||
score = null; | ||
} | ||
} | ||
fr.close(); | ||
fw.close(); | ||
} | ||
catch(IOException e){ | ||
e.printStackTrace(); | ||
} | ||
} | ||
|
||
public int getTotalReviews(){ | ||
return totalReviews; | ||
} | ||
|
||
public int getTotalProducts(){ | ||
return totalProducts; | ||
} | ||
|
||
public int getTotalUsers(){ | ||
return totalUsers; | ||
} | ||
|
||
public List<String> getRecommendationsForUser(String userId) { | ||
|
||
List<String> recommendations = new ArrayList<>(); | ||
|
||
try { | ||
File datasetFile = new File("movies.csv"); | ||
DataModel model = new FileDataModel(datasetFile); | ||
UserSimilarity similarity = new PearsonCorrelationSimilarity(model); | ||
UserNeighborhood neighborhood = new ThresholdUserNeighborhood(0.1, similarity, model); | ||
UserBasedRecommender recommender = new GenericUserBasedRecommender(model, neighborhood, similarity); | ||
|
||
recommendations = recommender.recommend(users.get(userId), 3) | ||
.stream() | ||
.map(item -> products.getKey((int) item.getItemID())) | ||
.collect(Collectors.toList()); | ||
} | ||
catch (IOException | TasteException e) { | ||
e.printStackTrace(); | ||
} | ||
return recommendations; | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -15,7 +15,7 @@ public class MovieRecommenderTest { | |
public void testDataInfo() throws IOException, TasteException { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please use a meaningful name for the tests, something like the intention of your test, what you are trying to show, and provide more context to the assertions; the test not only proves that your code works fine, you are providing 'live' documentation. |
||
//download movies.txt.gz from | ||
// http://snap.stanford.edu/data/web-Movies.html | ||
MovieRecommender recommender = new MovieRecommender("/path/to/movies.txt.gz"); | ||
MovieRecommender recommender = new MovieRecommender("./movies.txt"); | ||
assertEquals(7911684, recommender.getTotalReviews()); | ||
assertEquals(253059, recommender.getTotalProducts()); | ||
assertEquals(889176, recommender.getTotalUsers()); | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
redundant initialization, in Java the primitives are initialized by default in this case to 0