Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Movie Recommender Zeus #37

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 20 additions & 0 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,27 @@
<artifactId>big-data</artifactId>
<version>1.0-SNAPSHOT</version>
<packaging>jar</packaging>
<build>
<plugins>
<plugin>
<version>3.1</version>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<configuration>
<source>17</source>
<target>17</target>
</configuration>
</plugin>
</plugins>
</build>

<name>big-data</name>
<url>http://maven.apache.org</url>

<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<maven.compiler.target>17</maven.compiler.target>
<maven.compiler.source>17</maven.compiler.source>
</properties>

<dependencies>
Expand All @@ -26,5 +41,10 @@
<version>4.7</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-collections4</artifactId>
<version>4.4</version>
</dependency>
</dependencies>
</project>
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
package nearsoft.academy.bigdata.recommendation;

import java.io.*;
import java.util.*;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import org.apache.commons.collections4.BidiMap;
import org.apache.commons.collections4.bidimap.DualHashBidiMap;
import org.apache.mahout.cf.taste.common.TasteException;
import org.apache.mahout.cf.taste.impl.model.file.FileDataModel;
import org.apache.mahout.cf.taste.impl.neighborhood.ThresholdUserNeighborhood;
import org.apache.mahout.cf.taste.impl.recommender.GenericUserBasedRecommender;
import org.apache.mahout.cf.taste.impl.similarity.PearsonCorrelationSimilarity;
import org.apache.mahout.cf.taste.model.DataModel;
import org.apache.mahout.cf.taste.neighborhood.UserNeighborhood;
import org.apache.mahout.cf.taste.recommender.UserBasedRecommender;
import org.apache.mahout.cf.taste.similarity.UserSimilarity;

public class MovieRecommender {

private int totalReviews = 0;

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

redundant initialization, in Java the primitives are initialized by default in this case to 0

private int totalProducts = 0;
private int totalUsers = 0;

private Hashtable<String, Integer> users = new Hashtable<>();
private BidiMap<String, Integer> products = new DualHashBidiMap<>();

public MovieRecommender(String moviesPath) {
try {
Copy link

@linolarios linolarios Oct 26, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You can have some of the logic in the constructor but not the entire functionality, also a code full of conditionals is a bad signal, try to clean up the code with polymorphism or at least use a switch statement


File file = new File(moviesPath);
FileReader fr = new FileReader(file);
BufferedReader br = new BufferedReader(fr);

File csvFile = new File("movies.csv");

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

read these values from a property file to make your code more flexible and maintainable

FileWriter fw = new FileWriter(csvFile);

String line;

String score = null;
String productId = null;
String userId = null;

while((line = br.readLine()) != null){
if (line.startsWith("review/score")) {

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

use constants to avoid magic numbers and repeated string

score = line.split(" ")[1];
totalReviews++;
}
else if (line.startsWith("product/productId")) {
productId = line.split(" ")[1];
if (!products.containsKey(productId)) {
totalProducts++;
products.put(productId, totalProducts);
}
}
else if (line.startsWith("review/userId")) {
userId = line.split(" ")[1];
if (!users.containsKey(userId)) {
totalUsers++;
users.put(userId, totalUsers);
}
}
if (productId != null && userId != null && score != null) {

StringBuilder row = new StringBuilder();
row.append(String.valueOf(users.get(userId)) + ","
+ String.valueOf(products.get(productId)) + ","
+ score);
row.append("\n");
fw.write(row.toString());

productId = null;
userId = null;
score = null;
}
}
fr.close();
fw.close();
}
catch(IOException e){
e.printStackTrace();
}
}

public int getTotalReviews(){
return totalReviews;
}

public int getTotalProducts(){
return totalProducts;
}

public int getTotalUsers(){
return totalUsers;
}

public List<String> getRecommendationsForUser(String userId) {

List<String> recommendations = new ArrayList<>();

try {
File datasetFile = new File("movies.csv");
DataModel model = new FileDataModel(datasetFile);
UserSimilarity similarity = new PearsonCorrelationSimilarity(model);
UserNeighborhood neighborhood = new ThresholdUserNeighborhood(0.1, similarity, model);
UserBasedRecommender recommender = new GenericUserBasedRecommender(model, neighborhood, similarity);

recommendations = recommender.recommend(users.get(userId), 3)
.stream()
.map(item -> products.getKey((int) item.getItemID()))
.collect(Collectors.toList());
}
catch (IOException | TasteException e) {
e.printStackTrace();
}
return recommendations;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ public class MovieRecommenderTest {
public void testDataInfo() throws IOException, TasteException {

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please use a meaningful name for the tests, something like the intention of your test, what you are trying to show, and provide more context to the assertions; the test not only proves that your code works fine, you are providing 'live' documentation.

//download movies.txt.gz from
// http://snap.stanford.edu/data/web-Movies.html
MovieRecommender recommender = new MovieRecommender("/path/to/movies.txt.gz");
MovieRecommender recommender = new MovieRecommender("./movies.txt");
assertEquals(7911684, recommender.getTotalReviews());
assertEquals(253059, recommender.getTotalProducts());
assertEquals(889176, recommender.getTotalUsers());
Expand Down