From 5f1042b4c1bca5260f45f5df375bb971ba730e90 Mon Sep 17 00:00:00 2001 From: Oleg Date: Sun, 16 Apr 2023 10:56:36 +0500 Subject: [PATCH 1/2] fix train_only_size --- implicit/evaluation.pyx | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/implicit/evaluation.pyx b/implicit/evaluation.pyx index f25d0da..c036336 100644 --- a/implicit/evaluation.pyx +++ b/implicit/evaluation.pyx @@ -187,16 +187,15 @@ cpdef leave_k_out_split( # get only users with n + 1 interactions candidate_mask = counts > K + 1 + unique_candidate_users = unique_users[candidate_mask] # keep a given subset of users _only_ in the training set. if train_only_size > 0.0: - train_only_mask = ~np.isin( - unique_users, _choose(random_state, len(unique_users), train_only_size) - ) - candidate_mask = train_only_mask & candidate_mask + adjusted_ratio = min(1, (1 - train_only_size) / (unique_candidate_users.shape[0] / unique_users.shape[0])) + train_only_mask = _choose(random_state, len(unique_candidate_users), adjusted_ratio) + unique_candidate_users = unique_candidate_users[train_only_mask] # get unique users who appear in the test set - unique_candidate_users = unique_users[candidate_mask] full_candidate_mask = np.isin(users, unique_candidate_users) # get all users, items and ratings that match specified requirements to be From 7d3585203bafe4ed3d6a4e10641bfb046b70f893 Mon Sep 17 00:00:00 2001 From: Oleg Date: Fri, 21 Apr 2023 07:09:49 +0500 Subject: [PATCH 2/2] fix zero-candidate case --- implicit/evaluation.pyx | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/implicit/evaluation.pyx b/implicit/evaluation.pyx index c036336..e029520 100644 --- a/implicit/evaluation.pyx +++ b/implicit/evaluation.pyx @@ -187,11 +187,13 @@ cpdef leave_k_out_split( # get only users with n + 1 interactions candidate_mask = counts > K + 1 + if sum(candidate_mask) == 0: + return ratings.tocsr(), csr_matrix(ratings.shape) unique_candidate_users = unique_users[candidate_mask] # keep a given subset of users _only_ in the training set. if train_only_size > 0.0: - adjusted_ratio = min(1, (1 - train_only_size) / (unique_candidate_users.shape[0] / unique_users.shape[0])) + adjusted_ratio = min(1, (1 - train_only_size) / (unique_candidate_users.shape[0] / (unique_users.shape[0] + 1))) train_only_mask = _choose(random_state, len(unique_candidate_users), adjusted_ratio) unique_candidate_users = unique_candidate_users[train_only_mask]