mlverse · Prateek0xeo · Jan 11, 2025 · Jan 13, 2025 · Jan 13, 2025 · Jan 17, 2025
diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml
@@ -53,7 +53,7 @@ jobs:
 
       - name: Upload check results
         if: failure()
-        uses: actions/upload-artifact@v3
+        uses: actions/upload-artifact@v4
         with:
           name: ${{ runner.os }}-r${{ matrix.config.r }}-results
           path: check
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -12,6 +12,11 @@ Authors@R: c(
            role = c("ctb"),
            email = "[email protected]"
            ),
+    person(given = "Prateek",
+           family = "Kumar",
+           role = c("ctb"),
+           email = "[email protected]"
+           ),
     person(family = "RStudio", role = c("cph"))
     )
 Description: Provides access to datasets, models and preprocessing
@@ -22,7 +27,7 @@ License: MIT + file LICENSE
 Encoding: UTF-8
 Roxygen: list(markdown = TRUE)
 URL: https://torchvision.mlverse.org, https://github.com/mlverse/torchvision
-RoxygenNote: 7.3.1
+RoxygenNote: 7.3.2
 Imports:
     torch (>= 0.5.0),
     fs,

diff --git a/NAMESPACE b/NAMESPACE
@@ -66,6 +66,7 @@ export(cifar10_dataset)
 export(draw_bounding_boxes)
 export(draw_keypoints)
 export(draw_segmentation_masks)
+export(eurosat_dataset)
 export(image_folder_dataset)
 export(kmnist_dataset)
 export(magick_loader)

diff --git a/NEWS.md b/NEWS.md
@@ -4,6 +4,7 @@
 - `tensor_image_display` and `tensor_image_browse` now accept all tensor_image dtypes. (#115, @cregouby) 
 - fix `transform_affine` help to remove confusion with `transforme_random_affine` help (#116, @cregouby)
 - add message translation in french (#112, @cregouby)
+- added `eurosat_dataset` dataloader support (#122, @Prateek0xeo)
 
 # torchvision 0.6.0
 

diff --git a/R/dataset-eurosat.R b/R/dataset-eurosat.R
@@ -0,0 +1,116 @@
+#' EuroSAT Dataset Loader (via Hugging Face API)
+#'
+#' Downloads and loads the EuroSAT dataset using Hugging Face API.
+#' The dataset consists of Sentinel-2 satellite images organized into 10 classes.
+#'
+#' @param root Character. The root directory where the dataset will be stored.
+#' @param split Character. One of `train`, `val`, or `test`.
+#' @param download Logical. If `TRUE`, downloads the dataset rows from the API if not already present.
+#' @param transform Function. Optional transformation to be applied to the images.
+#' @param target_transform Function. Optional transformation to be applied to the labels.
+#'
+#' @return An R6 dataset object that inherits from `torch::dataset`.
+#'
+#' @examples
+#' \dontrun{
+#' # Initialize the dataset
+#' ds <- eurosat_dataset(root = "./data/eurosat", split = "train", download = TRUE)
+#'
+#' # Access the first sample
+#' sample <- ds[1]
+#' print(sample$x) # Image
+#' print(sample$y) # Label
+#' }
+#' @export
+eurosat_dataset <- torch::dataset(
+  name = "eurosat",
+
+  initialize = function(root,
+                        split = "train",
+                        download = FALSE,
+                        transform = NULL,
+                        target_transform = NULL) {
+    self$root <- normalizePath(root, mustWork = FALSE)
+    self$split <- split
+    self$transform <- transform
+    self$target_transform <- target_transform
+
+    if (!split %in% c("train", "val", "test")) {
+      runtime_error("Invalid split. Must be one of 'train', 'val', or 'test'.")
+    }
+
+    self$zip_file <- file.path(self$root, "EuroSAT.zip")
+    self$images_dir <- file.path(self$root, "images")
+    self$split_file <- file.path(self$root, glue::glue("eurosat-{split}.txt"))
+
+    if (download) {
+      self$download()
+    }
+
+    if (!file.exists(self$split_file)) {
+      runtime_error(glue::glue("Split file not found for split='{split}'."))
+    }
+
+    self$data <- suppressWarnings(readLines(self$split_file))
+    self$load_meta()
+  },
+
+  load_meta = function() {
+    self$classes <- unique(sub("_.*", "", self$data))
+    self$class_to_idx <- setNames(seq_along(self$classes) - 1, self$classes)
+  },
+
+  download = function() {
+    if (!file.exists(self$zip_file) || file.size(self$zip_file) == 0) {
+      dir.create(self$root, recursive = TRUE, showWarnings = FALSE)
+      zip_url <- "https://huggingface.co/datasets/torchgeo/eurosat/resolve/main/EuroSAT.zip?download=true"
+      rlang::inform("Downloading dataset...")
+      utils::download.file(url = zip_url, destfile = self$zip_file, mode = "wb")
+      rlang::inform("Download complete.")
+    }
+
+    if (!dir.exists(self$images_dir)) {
+      rlang::inform("Extracting dataset...")
+      utils::unzip(self$zip_file, exdir = self$images_dir)
+      rlang::inform("Extraction finished.")
+    }
+
+    # Download the split-specific text file
+    txt_url <- glue::glue(
+      "https://huggingface.co/datasets/torchgeo/eurosat/resolve/main/eurosat-{self$split}.txt?download=true"
+    )
+    rlang::inform("Downloading split file...")
+    utils::download.file(url = txt_url, destfile = self$split_file, mode = "wb")
+    if (file.size(self$split_file) == 0) {
+      runtime_error("Downloaded split file is empty: ", self$split_file)
+    }
+  },
+
+  .getitem = function(index) {
+    filename <- self$data[index]
+    label <- as.character(sub("_.*", "", filename))  # Ensure label is a character string
+
+    image_path <- file.path(self$images_dir, "2750", label, filename)
+    if (!file.exists(image_path)) {
+      runtime_error("Image file not found: ", image_path)
+    }
+
+    img_array <- jpeg::readJPEG(image_path)
+
+    if (!is.null(self$transform)) {
+      img_array <- self$transform(img_array)
+    }
+
+    if (!label %in% names(self$class_to_idx)) {
+      runtime_error("Label not found in class_to_idx: ", label)
+    }
+
+    label_idx <- torch::torch_tensor(as.integer(self$class_to_idx[[label]]), dtype = torch_long())$squeeze()
+
+    list(x = img_array, y = label_idx)
+  },
+
+  .length = function() {
+    length(self$data)
+  }
+)
diff --git a/inst/po/fr/LC_MESSAGES/R-torchvision.mo b/inst/po/fr/LC_MESSAGES/R-torchvision.mo
diff --git a/man/eurosat_dataset.Rd b/man/eurosat_dataset.Rd
diff --git a/po/R-fr.po b/po/R-fr.po
@@ -1,15 +1,15 @@
 msgid ""
 msgstr ""
 "Project-Id-Version: torchvision 0.6.0.9000\n"
-"POT-Creation-Date: 2024-08-23 09:42+0200\n"
-"PO-Revision-Date: 2024-08-23 09:49+0200\n"
-"Last-Translator: \n"
+"POT-Creation-Date: 2025-01-30 08:42+0100\n"
+"PO-Revision-Date: 2025-01-30 08:58+0100\n"
+"Last-Translator: Christophe Regouby <[email protected]>\n"
 "Language-Team: \n"
 "Language: fr\n"
 "MIME-Version: 1.0\n"
 "Content-Type: text/plain; charset=UTF-8\n"
 "Content-Transfer-Encoding: 8bit\n"
-"X-Generator: Poedit 3.4.3\n"
+"X-Generator: Poedit 3.0.1\n"
 
 #: conditions.R:2
 msgid "type_error"
@@ -35,6 +35,43 @@ msgstr "Fichier introuvable. Veuillez utiliser `download = TRUE`"
 msgid "Corrupt file! Delete the file in {p} and try again."
 msgstr "Fichier corrompu. Supprimez le fichier {p} et recommencez."
 
+#: dataset-eurosat.R:39
+msgid "Invalid split. Must be one of 'train', 'val', or 'test'."
+msgstr ""
+"`split` invalide. Il doit être choisit parmi `train`, `val`, ou `test`."
+
+#: dataset-eurosat.R:51
+msgid "Split file not found for split='%s'."
+msgstr "Fichier de split introuvable pour `split=`'%s'."
+
+#: dataset-eurosat.R:62
+msgid "Downloading EuroSAT ZIP..."
+msgstr "Téléchargement du fichize zip `EuroSAT."
+
+#: dataset-eurosat.R:64
+msgid "EuroSAT ZIP downloaded."
+msgstr "`EuroSAT` zip téléchargé."
+
+#: dataset-eurosat.R:68
+msgid "Extracting EuroSAT ZIP..."
+msgstr "Extraction d' `EuroSAT.zip`."
+
+#: dataset-eurosat.R:70
+msgid "Extraction complete."
+msgstr "Extraction réussie."
+
+#: dataset-eurosat.R:78
+msgid "Downloading split text file:"
+msgstr "Téléchargement du fichier texte de `split:"
+
+#: dataset-eurosat.R:81
+msgid "Downloaded split file is empty:"
+msgstr "Le fichier de `split` est vide :"
+
+#: dataset-eurosat.R:91
+msgid "Image file not found:"
+msgstr "Fichier d'image introuvable :"
+
 #: dataset-mnist.R:44
 msgid "Dataset not found. You can use `download = TRUE` to download it."
 msgstr ""
@@ -73,23 +110,6 @@ msgstr "Les extensions supportées sont {paste(extensions, collapse=',')}"
 msgid "unknown extension {ext} in path {path}"
 msgstr "Extension de fichier {ext} inconnue pour {path}."
 
-#: models-efficientnet.R:29 models-resnet.R:34
-msgid "basic_block only supports groups=1 and base_width=64"
-msgstr ""
-"`basic_block` n'est compatible qu'avec `groups = 1` et `base_width = 64`."
-
-#: models-efficientnet.R:32 models-resnet.R:37
-msgid "Dilation > 1 not supported in basic_block"
-msgstr "`dilation > 1` non compatible avec le `basic_block`"
-
-#: models-efficientnet.R:140 models-resnet.R:145
-msgid "replace_stride_with_dilation should be NULL"
-msgstr "`replace_stride_with_dilation` doit être `NULL`"
-
-#: models-efficientnet.R:141 models-resnet.R:146
-msgid "or a 3-element tuple, got {length(replace_stride_with_dilation)}"
-msgstr "ou un triplet. Ici c'est {lenght(replace_stride_with_dilation)}"
-
 #: models-inception.R:16
 msgid ""
 "The default weight initialization of inception_v3 will be changed in future "
@@ -130,6 +150,23 @@ msgstr ""
 "ou une liste de vecteurs, chacun de longeur 4. Ici, c'est "
 "{inverted_residual_setting}"
 
+#: models-resnet.R:34
+msgid "basic_block only supports groups=1 and base_width=64"
+msgstr ""
+"`basic_block` n'est compatible qu'avec `groups = 1` et `base_width = 64`."
+
+#: models-resnet.R:37
+msgid "Dilation > 1 not supported in basic_block"
+msgstr "`dilation > 1` non compatible avec le `basic_block`"
+
+#: models-resnet.R:145
+msgid "replace_stride_with_dilation should be NULL"
+msgstr "`replace_stride_with_dilation` doit être `NULL`"
+
+#: models-resnet.R:146
+msgid "or a 3-element tuple, got {length(replace_stride_with_dilation)}"
+msgstr "ou un triplet. Ici c'est {lenght(replace_stride_with_dilation)}"
+
 #: tiny-imagenet-dataset.R:43
 msgid "Downloading tiny imagenet dataset!"
 msgstr "Téléchargement d'un mini jeu de données imagenet en cours!"
@@ -279,18 +316,22 @@ msgstr ""
 msgid "gamma must be non-negative"
 msgstr "`gamma` doit être positif."
 
-#: vision_utils.R:115
+#: vision_utils.R:105 vision_utils.R:204 vision_utils.R:276
+msgid "magick"
+msgstr ""
+
+#: vision_utils.R:117
 msgid "boxes doesn't contain any box. No box was drawn"
 msgstr "`boxes` ne contient aucun cadre. Aucun cadre n'a été tracé."
 
-#: vision_utils.R:120
+#: vision_utils.R:122
 msgid ""
 "Number of labels {length(labels)} cannot be broadcasted on number of boxes "
 "{num_boxes}"
 msgstr ""
 "Le nombre de labels ({length(labels)}) n'est pas un multiple du nombre de "
 "cadres ({num_boxes}) et ne peut pas être broadcasté."
 
-#: vision_utils.R:214
+#: vision_utils.R:217
 msgid "masks doesn't contain any mask. No mask was drawn"
 msgstr "`masks` ne contient pas de masque. Aucun masque n'a été tracé."