From 45df71b7256fc50ffa6a2c55c2c72e961d3d675b Mon Sep 17 00:00:00 2001
From: Ooberaj <yuvrajvirk@campus-057-142.ucdavis.edu>
Date: Wed, 30 Mar 2022 00:03:37 -0700
Subject: [PATCH] Add carrot_weeds_uk and onions_weeds_uk datasets

---
 agml/_assets/public_datasources.json | 42 +++++++++++++++
 agml/_assets/source_citations.json   |  8 +++
 agml/_internal/preprocess.py         | 79 ++++++++++++++++++++++++++--
 agml/_internal/process_utils.py      | 28 ++++++++++
 4 files changed, 154 insertions(+), 3 deletions(-)

diff --git a/agml/_assets/public_datasources.json b/agml/_assets/public_datasources.json
index fa31ef876..45176080b 100644
--- a/agml/_assets/public_datasources.json
+++ b/agml/_assets/public_datasources.json
@@ -919,5 +919,47 @@
                 0.22038120031356812
             ]
         }
+    },
+    "carrot_weeds_uk": {
+        "ml_task": "semantic_segmentation",
+        "ag_task": "weed_segmentation",
+        "location": {
+            "continent": "europe",
+            "country": "united kingdom"
+        },
+        "sensor_modality": "multispectral",
+        "real_synthetic": "real",
+        "platform": "ground_mobile",
+        "input_data_format": "png",
+        "annotation_format": "image",
+        "n_images": "20",
+        "docs_url": "https://lcas.lincoln.ac.uk/wp/research/data-sets-software/crop-vs-weed-discrimination-dataset/",
+        "classes": {
+            "0": "carrot",
+            "1": "weeds",
+            "2": "non-vegetation"
+        },
+        "external_image_sources": ["ndvi-images", "nir-images"]
+    },
+    "onions_weeds_uk": {
+        "ml_task": "semantic_segmentation",
+        "ag_task": "weed_segmentation",
+        "location": {
+            "continent": "europe",
+            "country": "united kingdom"
+        },
+        "sensor_modality": "multispectral",
+        "real_synthetic": "real",
+        "platform": "ground_mobile",
+        "input_data_format": "png",
+        "annotation_format": "image",
+        "n_images": "20",
+        "docs_url": "https://lcas.lincoln.ac.uk/wp/research/data-sets-software/crop-vs-weed-discrimination-dataset/",
+        "classes": {
+            "0": "onion",
+            "1": "weeds",
+            "2": "non-vegetation"
+        },
+        "external_image_sources": ["ndvi-images", "nir-images"]
     }
 }
\ No newline at end of file
diff --git a/agml/_assets/source_citations.json b/agml/_assets/source_citations.json
index d13ba9433..9d0fc27da 100644
--- a/agml/_assets/source_citations.json
+++ b/agml/_assets/source_citations.json
@@ -102,5 +102,13 @@
     "plant_doc_detection": {
         "license": "CC BY-SA 4.0",
         "citation": "@inproceedings{10.1145/3371158.3371196,\n  author = {Singh, Davinder and Jain, Naman and Jain, Pranjali and Kayal, Pratik and Kumawat, Sudhakar and Batra, Nipun},\n  title = {PlantDoc: A Dataset for Visual Plant Disease Detection},\n  year = {2020},\n  isbn = {9781450377386},\n  publisher = {Association for Computing Machinery},\n  address = {New York, NY, USA},\n  url = {https://doi.org/10.1145/3371158.3371196},\n  doi = {10.1145/3371158.3371196},\n  booktitle = {Proceedings of the 7th ACM IKDD CoDS and 25th COMAD},\n  pages = {249–253},\n  numpages = {5},\n  keywords = {Deep Learning, Object Detection, Image Classification},\n  location = {Hyderabad, India},\n  series = {CoDS COMAD 2020}\n  }"
+    },
+    "carrot_weeds_uk": {
+        "license": "CC BY-NC-SA 3.0",
+        "citation": "@article{bosilj2019transfer,\n  author = {Bosilj, Petra and Aptoula, Erchan and Duckett, Tom and Cielniak, Grzegorz},\n  title = {Transfer learning between crop types for semantic segmentation of crops versus weeds in precision agriculture},\n  journal = {Journal of Field Robotics},\n  year = 2019,\n  volume = {to be determined (published online)}\n }"
+    },
+    "onions_weeds_uk": {
+        "license": "CC BY-NC-SA 3.0",
+        "citation": "@article{bosilj2019transfer,\n  author = {Bosilj, Petra and Aptoula, Erchan and Duckett, Tom and Cielniak, Grzegorz},\n  title = {Transfer learning between crop types for semantic segmentation of crops versus weeds in precision agriculture},\n  journal = {Journal of Field Robotics},\n  year = 2019,\n  volume = {to be determined (published online)}\n }"
     }
 }
\ No newline at end of file
diff --git a/agml/_internal/preprocess.py b/agml/_internal/preprocess.py
index 91b6e291a..9f4e83fde 100644
--- a/agml/_internal/preprocess.py
+++ b/agml/_internal/preprocess.py
@@ -41,7 +41,7 @@
     read_txt_file, get_image_info, get_label2id,
     convert_bbox_to_coco, get_coco_annotation_from_obj, convert_xmls_to_cocojson,
     mask_annotation_per_bbox, move_segmentation_dataset,
-    create_sub_masks, create_sub_mask_annotation_per_bbox
+    create_sub_masks, create_sub_mask_annotation_per_bbox, rgb2mask
 )
 
 
@@ -816,9 +816,82 @@ def plant_doc_detection(self, dataset_name):
             extract_num_from_imgid=False
         )
 
+    def carrot_weeds_uk(self, dataset_name):
+      dataset_dir = os.path.join(self.data_original_dir, dataset_name)
+      rgb_paths, nir_paths, ndvi_paths, rgb_masks = ['images'], ['nir-images'], ['ndvi-images'], []
 
+      # get all rbg images, add unique identifier
+      for root, subdirs, files in list(os.walk(dataset_dir))[1:]:
+        id_ = root.split('/')[-1]
+        rgb_paths.append([os.path.join(root, "rgbreg_crop.png"), id_ + ".png"])
+        nir_paths.append([os.path.join(root, "depth_crop.png"), id_ + ".png"])
+        ndvi_paths.append([os.path.join(root, "ndvi_crop.png"), id_ + ".png"])
+        rgb_masks.append([os.path.join(root, "truth_crop.png"), id_ + "mask.png"])
 
+      processed_dir = os.path.join(self.data_processed_dir, dataset_name)
+      os.makedirs(processed_dir, exist_ok = True)
+      processed_annotation_dir = os.path.join(processed_dir, 'annotations')
+      os.makedirs(processed_annotation_dir, exist_ok = True)
 
+      image_types = [rgb_paths, nir_paths, ndvi_paths]
 
-
-
+      for image_type in image_types:
+        processed_image_dir = os.path.join(processed_dir, image_type[0])
+        os.makedirs(processed_image_dir, exist_ok = True)
+        for image_path in image_type[1:]:
+          shutil.copyfile(image_path[0], os.path.join(processed_image_dir, image_path[1])) 
+
+      color2index = {
+                  (0, 0, 0) : 0, # black is non-vegetation
+                  (0, 0, 255) : 1, # red is carrot
+                  (255, 0, 0) : 2, # blue is weed
+              }
+
+      for rgb_mask in rgb_masks:
+        rgb_mask_img = cv2.imread(rgb_mask[0])
+        index_mask = rgb2mask(rgb_mask_img, color2index)
+        anno_out = os.path.join(processed_annotation_dir, rgb_mask[1])
+        cv2.imwrite(anno_out, index_mask)
+
+    def onion_weeds_uk(self, dataset_name):
+      dataset_dir = os.path.join(self.data_original_dir, dataset_name)
+      rgb_paths, nir_paths, ndvi_paths, rgb_masks = ['images'], ['nir-images'], ['ndvi-images'], []
+
+      # get all rbg images, add unique identifier
+      for root, subdirs, files in list(os.walk(dataset_dir))[1:]:
+        id_ = root.split('/')[-1]
+        for file in files:
+          if file != 'partialc_crop.png' and file != 'truth.png':
+            img_type = file.split('_')[-2]
+            if img_type == 'depth':
+              nir_paths.append([os.path.join(root, file), id_ + ".png"])
+            elif img_type == 'ndvi':
+              ndvi_paths.append([os.path.join(root, file), id_ + ".png"])
+            elif img_type == 'rgbreg':
+              rgb_paths.append([os.path.join(root, file), id_ + ".png"])         
+        rgb_masks.append([os.path.join(root, "truth.png"), id_ + "mask.png"])
+
+      processed_dir = os.path.join(self.data_processed_dir, dataset_name)
+      os.makedirs(processed_dir, exist_ok = True)
+      processed_annotation_dir = os.path.join(processed_dir, 'annotations')
+      os.makedirs(processed_annotation_dir, exist_ok = True)
+
+      image_types = [rgb_paths, nir_paths, ndvi_paths]
+
+      for image_type in image_types:
+        processed_image_dir = os.path.join(processed_dir, image_type[0])
+        os.makedirs(processed_image_dir, exist_ok = True)
+        for image_path in image_type[1:]:
+          shutil.copyfile(image_path[0], os.path.join(processed_image_dir, image_path[1])) 
+
+      color2index = {
+                  (0, 0, 0) : 0, # black is non-vegetation
+                  (0, 0, 255) : 1, # red is onion
+                  (255, 0, 0) : 2, # blue is weed
+              }
+
+      for rgb_mask in rgb_masks:
+        rgb_mask_img = cv2.imread(rgb_mask[0])
+        index_mask = rgb2mask(rgb_mask_img, color2index)
+        anno_out = os.path.join(processed_annotation_dir, rgb_mask[1])
+        cv2.imwrite(anno_out, index_mask)
diff --git a/agml/_internal/process_utils.py b/agml/_internal/process_utils.py
index 3dc0154f5..db6653a42 100644
--- a/agml/_internal/process_utils.py
+++ b/agml/_internal/process_utils.py
@@ -568,3 +568,31 @@ def move_segmentation_dataset(
             shutil.copyfile(orig_annotation_path, out_label_path)
         else:
             annotation_preprocess_fn(orig_annotation_path, out_label_path)
+
+def rgb2mask(img, color2index):
+    '''
+    Convert rgb image to mask
+    Arguments:
+        img: image with 3 channels, rbg
+        color2index: dictionary. key: tuple containing color values (b, g, r). value: corresponding index.
+    Returns:
+        a mask with no channels and index values assigned to each pixel
+    Source: https://stackoverflow.com/a/62170172
+    '''
+    assert len(img.shape) == 3
+    height, width, ch = img.shape
+    assert ch == 3
+
+    W = np.power(256, [[0],[1],[2]])
+
+    img_id = img.dot(W).squeeze(-1) 
+    values = np.unique(img_id)
+
+    mask = np.zeros(img_id.shape)
+
+    for i, c in enumerate(values):
+        try:
+            mask[img_id==c] = color2index[tuple(img[img_id==c][0])] 
+        except:
+            pass
+    return mask
\ No newline at end of file