Include RLE conversion in instances_to_coco_json

Summary: Pull Request resolved: https://github.com/fairinternal/detectron2/pull/334 Test Plan: ``` ./dev/run_inference_tests.sh configs/quick_schedules/mask_rcnn_R_50_FPN_inference_acc_test.yaml python tools/train_net.py --config-file configs/LVIS-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml --num-gpus 2 --eval-only MODEL.WEIGHTS ~/data/D2models/LVIS-InstanceSegmentation/mask_rcnn_R_50_FPN_1x/144219072/model_final_571f7c.pkl ``` Differential Revision: D18676571 Pulled By: ppwwyyxx fbshipit-source-id: b36e92b7173b9b094731524951d071fef032d5ff
conansherry · Nov 25, 2019 · 7a7ded0 · 7a7ded0
1 parent 048610e
commit 7a7ded0
Show file tree

Hide file tree

Showing 4 changed files with 44 additions and 43 deletions.
diff --git a/datasets/README.md b/datasets/README.md
@@ -28,7 +28,7 @@ coco/
     panoptic_{train,val}2017.json
   panoptic_{train,val}2017/
     # png annotations
-	panoptic_stuff_{train,val}2017/  # generated by the script mentioned below
+  panoptic_stuff_{train,val}2017/  # generated by the script mentioned below
 ```
 
 Install panopticapi by:

diff --git a/detectron2/evaluation/coco_evaluation.py b/detectron2/evaluation/coco_evaluation.py
@@ -98,25 +98,7 @@ def process(self, inputs, outputs):
             # TODO this is ugly
             if "instances" in output:
                 instances = output["instances"].to(self._cpu_device)
-
-                if instances.has("pred_masks"):
-                    # Use RLE to encode the masks, because they are too large and take memory
-                    # since this evaluator stores outputs of the entire dataset.
-                    # Our model may predict bool array, but cocoapi expects uint8
-                    rles = [
-                        mask_util.encode(np.array(mask[:, :, None], order="F", dtype="uint8"))[0]
-                        for mask in instances.pred_masks
-                    ]
-                    for rle in rles:
-                        # "counts" is an array encoded by mask_util as a byte-stream. Python3's
-                        # json writer which always produces strings cannot serialize a bytestream
-                        # unless you decode it. Thankfully, utf-8 works out (which is also what
-                        # the pycocotools/_mask.pyx does).
-                        rle["counts"] = rle["counts"].decode("utf-8")
-                    instances.pred_masks_rle = rles
-                    instances.remove("pred_masks")
-
-                prediction["instances"] = instances_to_json(instances, input["image_id"])
+                prediction["instances"] = instances_to_coco_json(instances, input["image_id"])
             if "proposals" in output:
                 prediction["proposals"] = output["proposals"].to(self._cpu_device)
             self._predictions.append(prediction)
@@ -295,7 +277,17 @@ def _derive_coco_results(self, coco_eval, iou_type, class_names=None):
         return results
 
 
-def instances_to_json(instances, img_id):
+def instances_to_coco_json(instances, img_id):
+    """
+    Dump an "Instances" object to a COCO-format json that's used for evaluation.
+
+    Args:
+        instances (Instances):
+        img_id (int): the image id
+
+    Returns:
+        list[dict]: list of json annotations in COCO format.
+    """
     num_instance = len(instances)
     if num_instance == 0:
         return []
@@ -306,9 +298,20 @@ def instances_to_json(instances, img_id):
     scores = instances.scores.tolist()
     classes = instances.pred_classes.tolist()
 
-    has_mask = instances.has("pred_masks_rle")
+    has_mask = instances.has("pred_masks")
     if has_mask:
-        rles = instances.pred_masks_rle
+        # use RLE to encode the masks, because they are too large and takes memory
+        # since this evaluator stores outputs of the entire dataset
+        rles = [
+            mask_util.encode(np.array(mask[:, :, None], order="F", dtype="uint8"))[0]
+            for mask in instances.pred_masks
+        ]
+        for rle in rles:
+            # "counts" is an array encoded by mask_util as a byte-stream. Python3's
+            # json writer which always produces strings cannot serialize a bytestream
+            # unless you decode it. Thankfully, utf-8 works out (which is also what
+            # the pycocotools/_mask.pyx does).
+            rle["counts"] = rle["counts"].decode("utf-8")
 
     has_keypoints = instances.has("pred_keypoints")
     if has_keypoints:

diff --git a/detectron2/evaluation/lvis_evaluation.py b/detectron2/evaluation/lvis_evaluation.py
@@ -16,7 +16,7 @@
 from detectron2.structures import Boxes, BoxMode, pairwise_iou
 from detectron2.utils.logger import create_small_table
 
-from .coco_evaluation import instances_to_json
+from .coco_evaluation import instances_to_coco_json
 from .evaluator import DatasetEvaluator
 
 
@@ -79,27 +79,9 @@ def process(self, inputs, outputs):
         for input, output in zip(inputs, outputs):
             prediction = {"image_id": input["image_id"]}
 
-            # TODO this is ugly
             if "instances" in output:
                 instances = output["instances"].to(self._cpu_device)
-
-                if instances.has("pred_masks"):
-                    # use RLE to encode the masks, because they are too large and takes memory
-                    # since this evaluator stores outputs of the entire dataset
-                    rles = [
-                        mask_util.encode(np.array(mask[:, :, None], order="F", dtype="uint8"))[0]
-                        for mask in instances.pred_masks
-                    ]
-                    for rle in rles:
-                        # "counts" is an array encoded by mask_util as a byte-stream. Python3's
-                        # json writer which always produces strings cannot serialize a bytestream
-                        # unless you decode it. Thankfully, utf-8 works out (which is also what
-                        # the pycocotools/_mask.pyx does).
-                        rle["counts"] = rle["counts"].decode("utf-8")
-                    instances.pred_masks_rle = rles
-                    instances.remove("pred_masks")
-
-                prediction["instances"] = instances_to_json(instances, input["image_id"])
+                prediction["instances"] = instances_to_coco_json(instances, input["image_id"])
             if "proposals" in output:
                 prediction["proposals"] = output["proposals"].to(self._cpu_device)
             self._predictions.append(prediction)

diff --git a/docs/tutorials/datasets.md b/docs/tutorials/datasets.md
@@ -161,3 +161,19 @@ In detectron2, the term "thing" is used for instance-level tasks,
 and "stuff" is used for semantic segmentation tasks.
 Both are used in panoptic segmentation.
 
+
+### Update the Config for New Datasets
+
+Once you've registered the dataset, you can use the name of the dataset (e.g., "my_dataset" in
+example above) in `DATASETS.{TRAIN,TEST}`.
+There are other configs you might want to change to train or evaluate on new datasets:
+
+* `MODEL.ROI_HEADS.NUM_CLASSES` and `MODEL.RETINANET.NUM_CLASSES` are the number of thing classes
+	for R-CNN and RetinaNet models.
+* `MODEL.ROI_KEYPOINT_HEAD.NUM_KEYPOINTS` sets the number of keypoints for Keypoint R-CNN.
+  You'll also need to set [Keypoint OKS](http://cocodataset.org/#keypoints-eval)
+	with `TEST.KEYPOINT_OKS_SIGMAS` for evaluation.
+* `MODEL.SEM_SEG_HEAD.NUM_CLASSES` sets the number of stuff classes for Semantic FPN & Panoptic FPN.
+* If you're training Fast R-CNN (with precomputed proposals), `DATASETS.PROPOSAL_FILES_{TRAIN,TEST}`
+	need to match the datasts. The format of proposal files are documented
+	[here](../modules/data.html#detectron2.data.load_proposals_into_dataset).