From ef6cb2fafe7602126802f1d473d1fa4de27be04a Mon Sep 17 00:00:00 2001
From: Yotam Erel <erelyotam@gmail.com>
Date: Fri, 25 Aug 2023 12:45:53 +0900
Subject: [PATCH 1/2] adds mirror_annotation. change cmd flags to be consistent
 with common practices.

---
 src/icatcher/cli.py           |  53 +++++-------
 src/icatcher/face_detector.py |   5 +-
 src/icatcher/options.py       | 156 +++++++++++++++++-----------------
 src/icatcher/video.py         |   2 +-
 tests/test_basic.py           |  54 +++++++++++-
 5 files changed, 159 insertions(+), 111 deletions(-)

diff --git a/src/icatcher/cli.py b/src/icatcher/cli.py
index 73d8f04..44c94d7 100644
--- a/src/icatcher/cli.py
+++ b/src/icatcher/cli.py
@@ -21,7 +21,6 @@
     parallelize_face_detection,
     detect_face_opencv_dnn,
 )
-from pathos.helpers import cpu_count
 from batch_face import RetinaFace
 
 
@@ -278,11 +277,12 @@ def create_output_streams(video_path, framerate, resolution, opt):
             prediction_output_file = Path(
                 opt.output_annotation, video_path.stem + opt.output_file_suffix
             )
-            if opt.output_format == "PrefLookTimestamp":
-                with open(prediction_output_file, "w", newline="") as f:  # Write header
-                    f.write(
-                        "Tracks: left, right, away, codingactive, outofframe\nTime,Duration,TrackName,comment\n\n"
-                    )
+            if prediction_output_file.exists():
+                if opt.overwrite:
+                    prediction_output_file.unlink()
+                else:
+                    raise FileExistsError("Annotation output file already exists. Use --overwrite flag to overwrite.")
+            
     return video_output_file, prediction_output_file, skip
 
 
@@ -367,11 +367,7 @@ def predict_from_video(opt):
         last_class_text = ""  # Initialize so that we see the first class assignment as an event to record
 
         # if going to use cpu parallelization, don't allow for live stream video
-        if use_cpu and opt.fd_model == "retinaface" and not opt.dont_buffer:
-            # figure out how many cpus can be used
-            num_cpus = cpu_count() - opt.num_cpus_saved
-            assert num_cpus > 0
-
+        if use_cpu and opt.fd_model == "retinaface" and opt.fd_parallel_processing:
             # send all frames in to be preprocessed and have faces detected prior to running gaze detection
             total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
             vid_frames = range(
@@ -384,9 +380,9 @@ def predict_from_video(opt):
                 processed_frames[0].shape[0],
                 processed_frames[0].shape[1],
             )
-            logging.debug("face detection on buffered frames ...")
+            logging.info("performing face detection on buffered frames...")
             faces = parallelize_face_detection(
-                processed_frames, face_detector_model, num_cpus, opt
+                processed_frames, face_detector_model, opt.fd_num_cpus, opt
             )
             del processed_frames
 
@@ -413,7 +409,7 @@ def predict_from_video(opt):
             frames.append(frame)
 
             if (
-                use_cpu and opt.fd_model == "retinaface" and not opt.dont_buffer
+                use_cpu and opt.fd_model == "retinaface" and opt.fd_parallel_processing
             ):  # if using cpu, just pull from master
                 bboxes = master_bboxes[frame_count]
             elif opt.fd_model == "opencv_dnn":
@@ -528,6 +524,11 @@ def predict_from_video(opt):
                             corrected_transitions,
                         )
                 class_text = reverse_classes[answers[cursor]]
+                if opt.mirror_annotation:
+                    if class_text == "left":
+                        class_text = "right"
+                    elif class_text == "right":
+                        class_text = "left"                    
                 if opt.on_off:
                     class_text = "off" if class_text == "away" else "on"
                 if opt.output_video_path:
@@ -576,16 +577,6 @@ def predict_from_video(opt):
                                     confidences[cursor],
                                 )
                             )
-                    elif opt.output_format == "PrefLookTimestamp":
-                        if (
-                            class_text != last_class_text
-                        ):  # Record "event" for change of direction if code has changed
-                            frame_ms = int(
-                                (frame_count + cursor + 1) * (1000.0 / framerate)
-                            )
-                            with open(prediction_output_file, "a", newline="") as f:
-                                f.write("{},0,{}\n".format(frame_ms, class_text))
-                            last_class_text = class_text
                 logging.info(
                     "frame: {}, class: {}, confidence: {:.02f}, cur_fps: {:.02f}".format(
                         str(frame_count + cursor + 1),
@@ -624,12 +615,14 @@ def cleanup(
     if opt.output_video_path:
         video_output_file.release()
     if opt.output_annotation:  # write footer to file
-        if opt.output_format == "PrefLookTimestamp":
-            start_ms = int((1000.0 / framerate) * (opt.sliding_window_size // 2))
-            end_ms = int((1000.0 / framerate) * frame_count)
-            with open(prediction_output_file, "a", newline="") as f:
-                f.write("{},{},codingactive\n".format(start_ms, end_ms))
-        elif opt.output_format == "compressed":
+        if opt.output_format == "compressed":
+            answers = np.array(answers)
+            confidences = np.array(confidences)
+            if opt.mirror_annotation:
+                lefts = answers == classes["left"]
+                rights = answers == classes["right"]
+                answers[lefts] = classes["right"]
+                answers[rights] = classes["left"]
             np.savez(prediction_output_file, answers, confidences)
     cap.release()
 
diff --git a/src/icatcher/face_detector.py b/src/icatcher/face_detector.py
index b842a6e..b0ecf9e 100644
--- a/src/icatcher/face_detector.py
+++ b/src/icatcher/face_detector.py
@@ -4,6 +4,7 @@
 from pathos.pools import ProcessPool
 from icatcher import draw
 import logging
+from tqdm import tqdm
 
 
 def threshold_faces(all_faces: list, confidence_threshold: float):
@@ -60,12 +61,10 @@ def process_frames(cap, frames, h_start_at, h_end_at, w_start_at, w_end_at):
     :param h_end_at: optional crop coordinate
     :param w_start_at: optional crop coordinate
     :param w_end_at: optional crop coordinate
-    :param v
     :return: list of images corresponding to video frames
     """
     processed_frames = []
-    for frame in frames:
-        logging.debug("buffering frames {}/{}".format(frame, len(frames)))
+    for frame in tqdm(frames, desc="buffering frames"):
         cap.set(cv2.CAP_PROP_POS_FRAMES, frame)
         ret, image = cap.read()
         if ret:
diff --git a/src/icatcher/options.py b/src/icatcher/options.py
index 2c1958e..876c293 100644
--- a/src/icatcher/options.py
+++ b/src/icatcher/options.py
@@ -1,7 +1,7 @@
 import argparse
 from pathlib import Path
 from . import version
-
+from pathos.helpers import cpu_count
 
 def parse_arguments(my_string=None):
     """
@@ -13,7 +13,7 @@ def parse_arguments(my_string=None):
     parser.add_argument(
         "source",
         type=str,
-        help="the source to use (path to video file, folder or webcam id)",
+        help="The source to use (path to video file, folder or webcam id).",
     )
     parser.add_argument(
         "--model",
@@ -25,12 +25,19 @@ def parse_arguments(my_string=None):
             "icatcher+_bw-cali.pth",
             "icatcher+_senegal.pth",
         ],
-        help="model file that will be used for gaze detection",
+        help="Model file that will be used for gaze detection.",
+    )
+    parser.add_argument(
+        "--fd_model",
+        type=str,
+        choices=["retinaface", "opencv_dnn"],
+        default="retinaface",
+        help="The face detector model used. opencv_dnn may be more suitable for cpu usage if speed is priority over accuracy.",
     )
     parser.add_argument(
         "--use_fc_model",
         action="store_true",
-        help="if supplied, will use face classifier "
+        help="If supplied, will use face classifier "
         "to decide which crop to use from every frame.",
     )
     parser.add_argument(
@@ -42,21 +49,21 @@ def parse_arguments(my_string=None):
             "face_classifier_cali-bw.pth",
             "face_classifier_senegal.pth",
         ],
-        help="face classifier model file that will be used for deciding "
-        "which crop should we select from every frame. ",
+        help="Face classifier model file that will be used for deciding "
+        "which crop should we select from every frame.",
     )
     parser.add_argument(
         "--source_type",
         type=str,
         default="file",
         choices=["file", "webcam"],
-        help="selects source of stream to use.",
+        help="Selects source of stream to use.",
     )
     parser.add_argument(
         "--crop_percent",
         type=int,
         default=0,
-        help="A percent to crop video frames to prevent other people from appearing",
+        help="A percent to crop video frames to prevent other people from appearing.",
     )
     parser.add_argument(
         "--crop_mode",
@@ -64,61 +71,47 @@ def parse_arguments(my_string=None):
         choices=["top", "left", "right"],
         nargs="+",
         default=["top"],
-        help="where to crop video from, multi-choice.",
+        help="Where to crop video from, multi-choice.",
     )
     parser.add_argument(
-        "--track_face",
+        "--show_output",
         action="store_true",
-        help="if detection is lost, will keep track of face using last known position.",
+        help="Show results online in a separate window.",
     )
     parser.add_argument(
-        "--show_output",
-        action="store_true",
-        help="show results online in a separate window",
+        "--output_annotation", type=str, help="Folder to output annotations to."
     )
     parser.add_argument(
-        "--output_annotation", type=str, help="folder to output annotations to"
+        "--overwrite", action="store_true",
+        help="If an output annotation file exists, will overwrite it. Without this flag iCatcher+ will terminate upon encountering an existing annotation file." 
     )
     parser.add_argument(
         "--on_off",
         action="store_true",
-        help="left/right/away annotations will be swapped with on/off (only works with icatcher+)",
+        help="Left/right/away annotations will be swapped with on/off.",
+    )
+    parser.add_argument(
+        "--mirror_annotation",
+        action="store_true",
+        help="Left will be swapped with right, and right will be swapped with left.",
     )
     parser.add_argument(
         "--output_format",
         type=str,
         default="raw_output",
-        choices=["raw_output", "compressed", "PrefLookTimestamp"],
-    )  # https://osf.io/3n97m/ - PrefLookTimestamp coding standard
+        choices=["raw_output", "compressed"],
+    )
     parser.add_argument(
         "--output_video_path",
-        help="if present, annotated video will be saved to this folder",
+        help="If present, annotated video will be saved to this folder.",
     )
     parser.add_argument(
         "--pic_in_pic",
         action="store_true",
-        help="if present, a mini picture with detection will be shown in the output video",
-    )
-    parser.add_argument(
-        "--output_file_suffix", type=str, default=".txt", help="the output file suffix"
-    )
-    parser.add_argument(
-        "--image_size",
-        type=int,
-        default=100,
-        help="All images will be resized to this size",
-    )
-    parser.add_argument(
-        "--sliding_window_size",
-        type=int,
-        default=9,
-        help="Number of frames in rolling window of each datapoint",
+        help="If present, a mini picture with detections will be shown in the output video.",
     )
     parser.add_argument(
-        "--window_stride",
-        type=int,
-        default=2,
-        help="Stride between frames in rolling window",
+        "--output_file_suffix", type=str, default=".txt", help="The output file suffix."
     )
     parser.add_argument(
         "--per_channel_mean",
@@ -126,7 +119,7 @@ def parse_arguments(my_string=None):
         metavar=("Channel1_mean", "Channel2_mean", "Channel3_mean"),
         type=float,
         default=[0.485, 0.456, 0.406],
-        help="supply custom per-channel mean of data for normalization",
+        help="Supply custom per-channel mean of data for normalization.",
     )
     parser.add_argument(
         "--per_channel_std",
@@ -134,80 +127,83 @@ def parse_arguments(my_string=None):
         metavar=("Channel1_std", "Channel2_std", "Channel3_std"),
         type=float,
         default=[0.229, 0.224, 0.225],
-        help="supply custom per-channel std of data for normalization",
+        help="Supply custom per-channel std of data for normalization.",
     )
     parser.add_argument(
         "--gpu_id", type=int, default=-1, help="GPU id to use, use -1 for CPU."
     )
     parser.add_argument("--log", help="If present, writes log to this path")
     parser.add_argument(
-        "-v",
         "--verbosity",
         type=str,
         choices=["debug", "info", "warning"],
         default="info",
-        help="Selects verbosity level",
+        help="Selects verbosity level.",
     )
     parser.add_argument(
         "--video_filter",
         type=str,
-        help="provided file will be used to filter only test videos,"
-        " will assume certain file structure using the lookit/cali-bw/senegal datasets",
-    )
-    parser.add_argument(
-        "--raw_dataset_path",
-        type=str,
-        help="path to raw dataset (required if --video_filter is passed",
-    )
-    parser.add_argument(
-        "--raw_dataset_type",
-        type=str,
-        choices=["lookit", "cali-bw", "senegal", "generic"],
-        default="lookit",
-        help="the type of dataset to preprocess",
+        help="Provided file will be used to filter only test videos,"
+        " will assume certain file structure using the lookit/cali-bw/senegal datasets.",
     )
     parser.add_argument(
         "--illegal_transitions_path",
         type=str,
-        help="path to CSV with illegal transitions to 'smooth' over",
+        help="Path to CSV with illegal transitions to 'smooth' over.",
     )
     parser.add_argument("--version", action="version", version="%(prog)s " + version)
     # face detection options:
-    parser.add_argument(
-        "--fd_model",
-        type=str,
-        choices=["retinaface", "opencv_dnn"],
-        default="retinaface",
-        help="the face detector model used. opencv_dnn may be more suitable for cpu usage if speed is priority over accuracy",
-    )
     parser.add_argument(
         "--fd_confidence_threshold",
         type=float,
-        help="the score confidence threshold that needs to be met for a face to be detected",
+        help="The score confidence threshold that needs to be met for a face to be detected.",
+    )
+    parser.add_argument(
+        "--fd_parallel_processing",
+        action="store_true",
+        default=False,
+        help="(cpu, retinaface only) face detection will be parallelized, by batching the frames (requires buffering them), increasing memory usage, but decreasing overall processing time. Disallows live stream of results.",
     )
     parser.add_argument(
-        "--num_cpus_saved",
+        "--fd_num_cpus",
         type=int,
-        default=0,
-        help="(retinaface only) amount of cpus to not use in parallel processing of face detection",
+        default=-1,
+        help="(cpu, retinaface only) amount of cpus to use if face detection parallel processing is true (-1: use all available cpus)).",
     )
     parser.add_argument(
         "--fd_batch_size",
         type=int,
         default=16,
-        help="(retinaface only) amount of frames fed into face detector at one time for batch inference",
+        help="(cpu, retinaface only) amount of frames fed at once into face detector if parallel processing is true.",
     )
     parser.add_argument(
         "--fd_skip_frames",
         type=int,
         default=0,
-        help="(cpu only) amount of frames to skip between each face detection. previous bbox will be used",
+        help="(cpu, retinaface only) amount of frames to skip between each face detection if parallel processing is true. previous bbox will be used.",
     )
     parser.add_argument(
-        "--dont_buffer",
+        "--track_face",
         action="store_true",
-        default=False,
-        help="(cpu, retinaface only) frames will not be buffered, decreasing memory usage, but increasing processing time. Allows live stream of results.",
+        help="If detection is lost, will keep track of face using last known position. WARNING: untested experimental feature.",
+    )
+    parser.add_argument(
+        "--image_size",
+        type=int,
+        default=100,
+        help="All images will be resized to this size. WARNING: changing default results in untested behavior.",
+    )
+    parser.add_argument(
+        "--sliding_window_size",
+        type=int,
+        default=9,
+        help="Number of frames in rolling window of each datapoint. WARNING: changing default results in untested behavior.",
+    )
+    parser.add_argument(
+        "--window_stride",
+        type=int,
+        default=2,
+        help="Stride between frames in rolling window. WARNING: changing default results in untested behavior.",
     )
     if my_string is not None:
         args = parser.parse_args(my_string.split())
@@ -231,8 +227,6 @@ def parse_arguments(my_string=None):
         args.video_filter = Path(args.video_filter)
         if not args.video_filter.is_file() and not args.video_filter.is_dir():
             raise FileNotFoundError("Video filter is not a file or a folder")
-    if args.raw_dataset_path:
-        args.raw_dataset_path = Path(args.raw_dataset_path)
     if args.output_annotation:
         args.output_annotation = Path(args.output_annotation)
         args.output_annotation.mkdir(exist_ok=True, parents=True)
@@ -261,4 +255,14 @@ def parse_arguments(my_string=None):
 
         if not torch.cuda.is_available():
             raise ValueError("GPU is not available. Was torch compiled with CUDA?")
+    # figure out how many cpus can be used
+    use_cpu = True if args.gpu_id == -1 else False
+    if use_cpu:
+        if args.fd_num_cpus == -1:
+            args.fd_num_cpus = cpu_count()
+        else:
+            if args.fd_num_cpus > cpu_count():
+                raise ValueError(
+                    "Number of cpus requested is greater than available cpus"
+                )
     return args
diff --git a/src/icatcher/video.py b/src/icatcher/video.py
index 3f37533..aa92db9 100644
--- a/src/icatcher/video.py
+++ b/src/icatcher/video.py
@@ -163,5 +163,5 @@ def get_video_paths(opt):
             )
     else:
         # video_paths = [int(opt.source)]
-        raise NotImplementedError
+        raise NotImplementedError("sources other than video file or folder of videos are not currently supported.")
     return video_paths
diff --git a/tests/test_basic.py b/tests/test_basic.py
index 8a5ff57..82296bc 100644
--- a/tests/test_basic.py
+++ b/tests/test_basic.py
@@ -1,10 +1,13 @@
 import pytest
 import numpy as np
 import icatcher
+from icatcher.cli import predict_from_video
 from pathlib import Path
 
-
 def test_parse_illegal_transitions():
+    """
+    tests handling the option "illegal transitions".
+    """
     bad_path1 = Path("tests/test_data/illegal_transitions_bad1.csv")
     bad_path2 = Path("tests/test_data/illegal_transitions_bad2.csv")
     bad_path3 = Path("tests/test_data/illegal_transitions_bad3.csv")
@@ -25,6 +28,9 @@ def test_parse_illegal_transitions():
 
 
 def test_process_video():
+    """
+    tests processing a video file.
+    """
     arguments = "tests/test_data/test.mp4"
     opt = icatcher.options.parse_arguments(arguments)
     source = Path(opt.source)
@@ -41,6 +47,52 @@ def test_process_video():
 
 
 def test_mask():
+    """
+    tests masking an image.
+    """
     image = np.random.random((256, 512, 3))
     masked = icatcher.draw.mask_regions(image, 0, 128, 0, 256)
     assert masked[:128, 256:, :].all() == 0
+
+
+@pytest.mark.parametrize(
+    "args_string",
+    [
+        "tests/test_data/test.mp4 --model icatcher+_lookit.pth --fd_model opencv_dnn --output_annotation tests/test_data --overwrite",
+        "tests/test_data/test.mp4 --model icatcher+_lookit.pth --fd_model opencv_dnn --output_annotation tests/test_data",
+        "tests/test_data/test.mp4 --model icatcher+_lookit.pth --fd_model opencv_dnn --output_annotation tests/test_data --mirror_annotation --overwrite",
+        "tests/test_data/test.mp4 --model icatcher+_lookit.pth --fd_model opencv_dnn --output_annotation tests/test_data --output_format compressed --overwrite",
+        "tests/test_data/test.mp4 --model icatcher+_lookit.pth --fd_model opencv_dnn --output_annotation tests/test_data --mirror_annotation --output_format compressed --overwrite",
+    ],
+)
+def test_predict_from_video(args_string):
+    """
+    runs entire prediction pipeline with several command line options.
+    """
+    args = icatcher.options.parse_arguments(args_string)
+    if not args.overwrite:
+        try:
+            predict_from_video(args)
+        except FileExistsError: # should be raised if overwrite is False and file exists, which is expected since this is the second test
+            return
+    else:
+        predict_from_video(args)
+    if args.output_annotation:
+        if args.output_format == "compressed":
+            output_file = Path("tests/test_data/test.npz")
+            data = np.load(output_file)
+            predicted_classes = data["arr_0"]
+            confidences = data["arr_1"]
+        else:
+            output_file = Path("tests/test_data/test.txt")
+            with open(output_file, "r") as f:
+                data = f.readlines()
+            predicted_classes = [x.split(",")[1].strip() for x in data]
+            predicted_classes = np.array([icatcher.classes[x] for x in predicted_classes])
+            confidences = np.array([float(x.split(",")[2].strip()) for x in data])
+        assert len(predicted_classes) == len(confidences)
+        # assert len(predicted_classes) == 194 # hard coded number of frames in test video
+        if args.mirror_annotation:
+            assert (predicted_classes == 2).all()
+        else:
+            assert (predicted_classes == 1).all()
\ No newline at end of file

From c8aec7a0069ced30a5a22a0a3047ee3905a75d15 Mon Sep 17 00:00:00 2001
From: Yotam Erel <erelyotam@gmail.com>
Date: Fri, 25 Aug 2023 12:51:34 +0900
Subject: [PATCH 2/2] resolves #62

---
 pyproject.toml           | 4 ++--
 src/icatcher/__init__.py | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 4df5dee..c8f987f 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "icatcher"
-version = "0.1.2"
+version = "0.2.0"
 description = "iCatcher+: Robust and automated annotation of infant gaze from videos collected in laboratory, field, and online studies."
 readme = "README.md"
 authors = [{ name = "Yotam Erel", email = "erelyotam@gmail.com" }]
@@ -39,7 +39,7 @@ dev = ["pytest"]
 Homepage = "https://github.com/yoterel/icatcher_plus"
 
 [tool.bumpver]
-current_version = "0.1.2"
+current_version = "0.2.0"
 version_pattern = "MAJOR.MINOR.PATCH"
 commit_message = "bump version {old_version} -> {new_version}"
 commit = false
diff --git a/src/icatcher/__init__.py b/src/icatcher/__init__.py
index ed870fb..e7d4644 100644
--- a/src/icatcher/__init__.py
+++ b/src/icatcher/__init__.py
@@ -1,5 +1,5 @@
 ### define version
-__version__ = "0.1.2"
+__version__ = "0.2.0"
 version = __version__
 ### define classes
 classes = {"noface": -2, "nobabyface": -1, "away": 0, "left": 1, "right": 2}