Skip to content

Commit

Permalink
Merge pull request #63 from yoterel/master
Browse files Browse the repository at this point in the history
adds mirror_annotation, and improves command line options consistency.
  • Loading branch information
yoterel authored Aug 25, 2023
2 parents 1cf0d06 + 70411cd commit dbce3ac
Show file tree
Hide file tree
Showing 7 changed files with 162 additions and 114 deletions.
4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"

[project]
name = "icatcher"
version = "0.1.2"
version = "0.2.0"
description = "iCatcher+: Robust and automated annotation of infant gaze from videos collected in laboratory, field, and online studies."
readme = "README.md"
authors = [{ name = "Yotam Erel", email = "[email protected]" }]
Expand Down Expand Up @@ -39,7 +39,7 @@ dev = ["pytest"]
Homepage = "https://github.com/yoterel/icatcher_plus"

[tool.bumpver]
current_version = "0.1.2"
current_version = "0.2.0"
version_pattern = "MAJOR.MINOR.PATCH"
commit_message = "bump version {old_version} -> {new_version}"
commit = false
Expand Down
2 changes: 1 addition & 1 deletion src/icatcher/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
### define version
__version__ = "0.1.2"
__version__ = "0.2.0"
version = __version__
### define classes
classes = {"noface": -2, "nobabyface": -1, "away": 0, "left": 1, "right": 2}
Expand Down
53 changes: 23 additions & 30 deletions src/icatcher/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@
parallelize_face_detection,
detect_face_opencv_dnn,
)
from pathos.helpers import cpu_count
from batch_face import RetinaFace


Expand Down Expand Up @@ -278,11 +277,12 @@ def create_output_streams(video_path, framerate, resolution, opt):
prediction_output_file = Path(
opt.output_annotation, video_path.stem + opt.output_file_suffix
)
if opt.output_format == "PrefLookTimestamp":
with open(prediction_output_file, "w", newline="") as f: # Write header
f.write(
"Tracks: left, right, away, codingactive, outofframe\nTime,Duration,TrackName,comment\n\n"
)
if prediction_output_file.exists():
if opt.overwrite:
prediction_output_file.unlink()
else:
raise FileExistsError("Annotation output file already exists. Use --overwrite flag to overwrite.")

return video_output_file, prediction_output_file, skip


Expand Down Expand Up @@ -367,11 +367,7 @@ def predict_from_video(opt):
last_class_text = "" # Initialize so that we see the first class assignment as an event to record

# if going to use cpu parallelization, don't allow for live stream video
if use_cpu and opt.fd_model == "retinaface" and not opt.dont_buffer:
# figure out how many cpus can be used
num_cpus = cpu_count() - opt.num_cpus_saved
assert num_cpus > 0

if use_cpu and opt.fd_model == "retinaface" and opt.fd_parallel_processing:
# send all frames in to be preprocessed and have faces detected prior to running gaze detection
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
vid_frames = range(
Expand All @@ -384,9 +380,9 @@ def predict_from_video(opt):
processed_frames[0].shape[0],
processed_frames[0].shape[1],
)
logging.debug("face detection on buffered frames ...")
logging.info("performing face detection on buffered frames...")
faces = parallelize_face_detection(
processed_frames, face_detector_model, num_cpus, opt
processed_frames, face_detector_model, opt.fd_num_cpus, opt
)
del processed_frames

Expand All @@ -413,7 +409,7 @@ def predict_from_video(opt):
frames.append(frame)

if (
use_cpu and opt.fd_model == "retinaface" and not opt.dont_buffer
use_cpu and opt.fd_model == "retinaface" and opt.fd_parallel_processing
): # if using cpu, just pull from master
bboxes = master_bboxes[frame_count]
elif opt.fd_model == "opencv_dnn":
Expand Down Expand Up @@ -528,6 +524,11 @@ def predict_from_video(opt):
corrected_transitions,
)
class_text = reverse_classes[answers[cursor]]
if opt.mirror_annotation:
if class_text == "left":
class_text = "right"
elif class_text == "right":
class_text = "left"
if opt.on_off:
class_text = "off" if class_text == "away" else "on"
if opt.output_video_path:
Expand Down Expand Up @@ -576,16 +577,6 @@ def predict_from_video(opt):
confidences[cursor],
)
)
elif opt.output_format == "PrefLookTimestamp":
if (
class_text != last_class_text
): # Record "event" for change of direction if code has changed
frame_ms = int(
(frame_count + cursor + 1) * (1000.0 / framerate)
)
with open(prediction_output_file, "a", newline="") as f:
f.write("{},0,{}\n".format(frame_ms, class_text))
last_class_text = class_text
logging.info(
"frame: {}, class: {}, confidence: {:.02f}, cur_fps: {:.02f}".format(
str(frame_count + cursor + 1),
Expand Down Expand Up @@ -624,12 +615,14 @@ def cleanup(
if opt.output_video_path:
video_output_file.release()
if opt.output_annotation: # write footer to file
if opt.output_format == "PrefLookTimestamp":
start_ms = int((1000.0 / framerate) * (opt.sliding_window_size // 2))
end_ms = int((1000.0 / framerate) * frame_count)
with open(prediction_output_file, "a", newline="") as f:
f.write("{},{},codingactive\n".format(start_ms, end_ms))
elif opt.output_format == "compressed":
if opt.output_format == "compressed":
answers = np.array(answers)
confidences = np.array(confidences)
if opt.mirror_annotation:
lefts = answers == classes["left"]
rights = answers == classes["right"]
answers[lefts] = classes["right"]
answers[rights] = classes["left"]
np.savez(prediction_output_file, answers, confidences)
cap.release()

Expand Down
5 changes: 2 additions & 3 deletions src/icatcher/face_detector.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from pathos.pools import ProcessPool
from icatcher import draw
import logging
from tqdm import tqdm


def threshold_faces(all_faces: list, confidence_threshold: float):
Expand Down Expand Up @@ -60,12 +61,10 @@ def process_frames(cap, frames, h_start_at, h_end_at, w_start_at, w_end_at):
:param h_end_at: optional crop coordinate
:param w_start_at: optional crop coordinate
:param w_end_at: optional crop coordinate
:param v
:return: list of images corresponding to video frames
"""
processed_frames = []
for frame in frames:
logging.debug("buffering frames {}/{}".format(frame, len(frames)))
for frame in tqdm(frames, desc="buffering frames"):
cap.set(cv2.CAP_PROP_POS_FRAMES, frame)
ret, image = cap.read()
if ret:
Expand Down
Loading

0 comments on commit dbce3ac

Please sign in to comment.