artefactory
diff --git a/‎README.md‎
Lines changed: 5 additions & 3 deletions b/‎README.md‎
Lines changed: 5 additions & 3 deletions
diff --git a/‎choice_learn/basket_models/alea_carta.py‎
Lines changed: 17 additions & 5 deletions b/‎choice_learn/basket_models/alea_carta.py‎
Lines changed: 17 additions & 5 deletions
@@ -122,6 +122,7 @@ Here are two tutorials published on Medium that can help you
 | Shopper | [![alt text](docs/illustrations/logos/jupyter_logo.png)](notebooks/basket_models/shopper.ipynb)  &nbsp; [![Open In Colab](https://img.shields.io/badge/-grey?logo=googlecolab)](https://colab.research.google.com/github/artefactory/choice-learn/blob/main/notebooks/basket_models/shopper.ipynb) | Ruiz et al. [[16]](#trident-references) &nbsp; &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;| *Shopper* | [#](https://artefactory.github.io/choice-learn/references/basket_models/references_shopper/) |
 | Alea Carta | [![alt text](docs/illustrations/logos/jupyter_logo.png)](notebooks/basket_models/alea_carta.ipynb)  &nbsp; [![Open In Colab](https://img.shields.io/badge/-grey?logo=googlecolab)](https://colab.research.google.com/github/artefactory/choice-learn/blob/main/notebooks/basket_models/alea_carta.ipynb) | Désir et al. [[17]](#trident-references) | *AleaCarta* | [#](https://artefactory.github.io/choice-learn/references/basket_models/references_alea_carta/) |
 | Base Attention | [![alt text](docs/illustrations/logos/jupyter_logo.png)](notebooks/basket_models/basic_attention.ipynb)  &nbsp; [![Open In Colab](https://img.shields.io/badge/-grey?logo=googlecolab)](https://colab.research.google.com/github/artefactory/choice-learn/blob/main/notebooks/basket_models/basic_attention.ipynb) | Wang et al. [[18]](#trident-references) | *AttentionBasedContextEmbedding* | [#]() |
+| Self Attention | [![alt text](docs/illustrations/logos/jupyter_logo.png)](notebooks/basket_models/self_attention.ipynb)  &nbsp; [![Open In Colab](https://img.shields.io/badge/-grey?logo=googlecolab)](https://colab.research.google.com/github/artefactory/choice-learn/blob/main/notebooks/basket_models/self_attention.ipynb) | Zang et al. [[20]](#trident-references) | *SelfAttentionModel* | [#]() |
 
 
 ### Data
@@ -377,8 +378,8 @@ The use of this software is under the MIT license, with no limitation of usage,
 [16] [SHOPPER: A Probabilistic Model of Consumer Choice with Substitutes and Complements](https://arxiv.org/abs/1711.03560), Ruiz, F. J. R.; Athey, S.; Blei, D. M. (2019)\
 [17] [Better Capturing Interactions between Products in Retail: Revisited Negative Sampling for Basket Choice Modeling](https://ojs.aaai.org/index.php/AAAI/article/view/11851), Désir, J.; Auriau, V.; Možina, M.; Malherbe, E. (2025), ECML PKDDD\
 [18] [Attention-based Transactional Context Embedding for Next-Item Recommendation](https://ojs.aaai.org/index.php/AAAI/article/view/11851), Wans, S.; Liang, H.; Longbing,C.; Xiaoshui, H.; Defu, L.; Wei, L. (2018)\
-[19] [A Discrete Choice Model for Subset Selection.](https://www.cs.cornell.edu/~arb/papers/higher-order-choice-wsdm-2018.pdf), Benson, A.; Kumar, R.; Tomkins, A. (2018)
-
+[19] [A Discrete Choice Model for Subset Selection.](https://www.cs.cornell.edu/~arb/papers/higher-order-choice-wsdm-2018.pdf), Benson, A.; Kumar, R.; Tomkins, A. (2018)\
+[20] [Next Item Recommendation with Self-Attention.](https://recnlp2019.github.io/papers/RecNLP2019_paper_21.pdf), Zhang, S.; Yao, L.; Tay, Y.; Sun, A. (2018)\
 ### Code and Repositories
 
 *Official models implementations:*
@@ -388,4 +389,5 @@ The use of this software is under the MIT license, with no limitation of usage,
 [12] [ResLogit](https://github.com/LiTrans/reslogit)\
 [13] [Learning-MNL](https://github.com/BSifringer/EnhancedDCM)\
 [16] [Shopper](https://github.com/franrruiz/shopper-src)\
-[17] [AleaCarta](https://github.com/artefactory/alea-carta-est)
+[17] [AleaCarta](https://github.com/artefactory/alea-carta-est)\
+[20] [SelfAttention](https://github.com/artefactory/rd-self-attentive)
@@ -34,6 +34,7 @@ def __init__(
         weight_decay: Union[float, None] = None,
         momentum: float = 0.0,
         epsilon_price: float = 1e-5,
+        l2_regularization: float = 0.0,
         **kwargs,
     ) -> None:
         """Initialize the AleaCarta model.
@@ -79,6 +80,7 @@ def __init__(
         self.item_intercept = item_intercept
         self.price_effects = price_effects
         self.seasonal_effects = seasonal_effects
+        self.l2_regularization = l2_regularization
 
         if "preferences" not in latent_sizes.keys():
             logging.warning(
@@ -265,6 +267,7 @@ def compute_batch_utility(
         week_batch: np.ndarray,
         price_batch: np.ndarray,
         available_item_batch: np.ndarray,
+        user_batch: Union[np.ndarray, tf.Tensor],
     ) -> tf.Tensor:
         """Compute the utility of all the items in item_batch given the items in basket_batch.
 
@@ -297,6 +300,7 @@ def compute_batch_utility(
             Utility of all the items in item_batch
             Shape must be (batch_size,)
         """
+        _ = user_batch
         _ = available_item_batch
         item_batch = tf.cast(item_batch, dtype=tf.int32)
         if len(tf.shape(item_batch)) == 1:
@@ -448,7 +452,6 @@ def compute_basket_utility(
             [np.delete(basket, i) for i in range(len_basket)]
         )  # Shape: (len_basket, len(basket) - 1)
 
-        # Basket utility = sum of the utilities of the items in the basket
         return tf.reduce_sum(
             self.compute_batch_utility(
                 item_batch=basket,
@@ -457,6 +460,7 @@ def compute_basket_utility(
                 week_batch=np.array([week] * len_basket),
                 price_batch=prices,
                 available_item_batch=available_item_batch,
+                user_batch=None,
             )
         ).numpy()
 
@@ -536,6 +540,7 @@ def compute_batch_loss(
         week_batch: np.ndarray,
         price_batch: np.ndarray,
         available_item_batch: np.ndarray,
+        user_batch: np.ndarray,
     ) -> tuple[tf.Variable]:
         """Compute log-likelihood and loss for one batch of items.
 
@@ -576,6 +581,7 @@ def compute_batch_loss(
             Approximated by difference of utilities between positive and negative samples
             Shape must be (1,)
         """
+        _ = user_batch
         _ = future_batch
         batch_size = len(item_batch)
         item_batch = tf.cast(item_batch, dtype=tf.int32)
@@ -593,7 +599,6 @@ def compute_batch_loss(
             ],
             axis=0,
         )
-
         augmented_item_batch = tf.cast(
             tf.concat([tf.expand_dims(item_batch, axis=-1), negative_samples], axis=1),
             dtype=tf.int32,
@@ -612,7 +617,8 @@ def compute_batch_loss(
             week_batch=week_batch,
             price_batch=augmented_price_batch,
             available_item_batch=available_item_batch,
-        )
+            user_batch=None,
+        )  # Shape: (batch_size * (n_negative_samples + 1),)
 
         positive_samples_utilities = tf.gather(params=all_utilities, indices=[0], axis=1)
         negative_samples_utilities = tf.gather(
@@ -645,6 +651,12 @@ def compute_batch_loss(
             ),
             output=tf.nn.sigmoid(all_utilities),
         )  # Shape: (batch_size * (n_negative_samples + 1),)
-
+        ridge_regularization = self.l2_regularization * tf.add_n(
+            [tf.nn.l2_loss(weight) for weight in self.trainable_weights]
+        )
         # Normalize by the batch size and the number of negative samples
-        return tf.reduce_sum(bce) / (batch_size * self.n_negative_samples), loglikelihood
+        return (
+            tf.reduce_sum(bce + ridge_regularization)
+            / (batch_size * (self.n_negative_samples + 1)),
+            loglikelihood,
+        )