diff --git a/pom.xml b/pom.xml index 8169ff7..bb17378 100644 --- a/pom.xml +++ b/pom.xml @@ -6,12 +6,27 @@ big-data 1.0-SNAPSHOT jar + + + + 3.1 + org.apache.maven.plugins + maven-compiler-plugin + + 17 + 17 + + + + big-data http://maven.apache.org UTF-8 + 17 + 17 @@ -26,5 +41,10 @@ 4.7 test + + org.apache.commons + commons-collections4 + 4.4 + diff --git a/src/main/java/nearsoft/academy/bigdata/recommendation/MovieRecommender.java b/src/main/java/nearsoft/academy/bigdata/recommendation/MovieRecommender.java new file mode 100644 index 0000000..f1ec722 --- /dev/null +++ b/src/main/java/nearsoft/academy/bigdata/recommendation/MovieRecommender.java @@ -0,0 +1,121 @@ +package nearsoft.academy.bigdata.recommendation; + +import java.io.*; +import java.util.*; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.Paths; +import java.util.stream.Collectors; +import java.util.stream.Stream; +import org.apache.commons.collections4.BidiMap; +import org.apache.commons.collections4.bidimap.DualHashBidiMap; +import org.apache.mahout.cf.taste.common.TasteException; +import org.apache.mahout.cf.taste.impl.model.file.FileDataModel; +import org.apache.mahout.cf.taste.impl.neighborhood.ThresholdUserNeighborhood; +import org.apache.mahout.cf.taste.impl.recommender.GenericUserBasedRecommender; +import org.apache.mahout.cf.taste.impl.similarity.PearsonCorrelationSimilarity; +import org.apache.mahout.cf.taste.model.DataModel; +import org.apache.mahout.cf.taste.neighborhood.UserNeighborhood; +import org.apache.mahout.cf.taste.recommender.UserBasedRecommender; +import org.apache.mahout.cf.taste.similarity.UserSimilarity; + +public class MovieRecommender { + + private int totalReviews = 0; + private int totalProducts = 0; + private int totalUsers = 0; + + private Hashtable users = new Hashtable<>(); + private BidiMap products = new DualHashBidiMap<>(); + + public MovieRecommender(String moviesPath) { + try { + + File file = new File(moviesPath); + FileReader fr = new FileReader(file); + BufferedReader br = new BufferedReader(fr); + + File csvFile = new File("movies.csv"); + FileWriter fw = new FileWriter(csvFile); + + String line; + + String score = null; + String productId = null; + String userId = null; + + while((line = br.readLine()) != null){ + if (line.startsWith("review/score")) { + score = line.split(" ")[1]; + totalReviews++; + } + else if (line.startsWith("product/productId")) { + productId = line.split(" ")[1]; + if (!products.containsKey(productId)) { + totalProducts++; + products.put(productId, totalProducts); + } + } + else if (line.startsWith("review/userId")) { + userId = line.split(" ")[1]; + if (!users.containsKey(userId)) { + totalUsers++; + users.put(userId, totalUsers); + } + } + if (productId != null && userId != null && score != null) { + + StringBuilder row = new StringBuilder(); + row.append(String.valueOf(users.get(userId)) + "," + + String.valueOf(products.get(productId)) + "," + + score); + row.append("\n"); + fw.write(row.toString()); + + productId = null; + userId = null; + score = null; + } + } + fr.close(); + fw.close(); + } + catch(IOException e){ + e.printStackTrace(); + } + } + + public int getTotalReviews(){ + return totalReviews; + } + + public int getTotalProducts(){ + return totalProducts; + } + + public int getTotalUsers(){ + return totalUsers; + } + + public List getRecommendationsForUser(String userId) { + + List recommendations = new ArrayList<>(); + + try { + File datasetFile = new File("movies.csv"); + DataModel model = new FileDataModel(datasetFile); + UserSimilarity similarity = new PearsonCorrelationSimilarity(model); + UserNeighborhood neighborhood = new ThresholdUserNeighborhood(0.1, similarity, model); + UserBasedRecommender recommender = new GenericUserBasedRecommender(model, neighborhood, similarity); + + recommendations = recommender.recommend(users.get(userId), 3) + .stream() + .map(item -> products.getKey((int) item.getItemID())) + .collect(Collectors.toList()); + } + catch (IOException | TasteException e) { + e.printStackTrace(); + } + return recommendations; + } +} \ No newline at end of file diff --git a/src/test/java/nearsoft/academy/bigdata/recommendation/MovieRecommenderTest.java b/src/test/java/nearsoft/academy/bigdata/recommendation/MovieRecommenderTest.java index 0d0b1fe..cdfeade 100644 --- a/src/test/java/nearsoft/academy/bigdata/recommendation/MovieRecommenderTest.java +++ b/src/test/java/nearsoft/academy/bigdata/recommendation/MovieRecommenderTest.java @@ -15,7 +15,7 @@ public class MovieRecommenderTest { public void testDataInfo() throws IOException, TasteException { //download movies.txt.gz from // http://snap.stanford.edu/data/web-Movies.html - MovieRecommender recommender = new MovieRecommender("/path/to/movies.txt.gz"); + MovieRecommender recommender = new MovieRecommender("./movies.txt"); assertEquals(7911684, recommender.getTotalReviews()); assertEquals(253059, recommender.getTotalProducts()); assertEquals(889176, recommender.getTotalUsers());