Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Put back Letterbox transform #212

Merged
merged 9 commits into from
Jun 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 47 additions & 1 deletion pyroengine/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,11 @@
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.


import cv2 # type: ignore[import-untyped]
import numpy as np
from tqdm import tqdm # type: ignore[import-untyped]

__all__ = ["nms", "xywh2xyxy", "DownloadProgressBar"]
__all__ = ["nms", "xywh2xyxy", "DownloadProgressBar", "letterbox"]


def xywh2xyxy(x: np.ndarray):
Expand All @@ -19,6 +20,51 @@
return y


def letterbox(
im: np.ndarray, new_shape: tuple = (640, 640), color: tuple = (114, 114, 114), auto: bool = False, stride: int = 32
):
"""Letterbox image transform for yolo models
Args:
im (np.ndarray): Input image
new_shape (tuple, optional): Image size. Defaults to (640, 640).
color (tuple, optional): Pixel fill value for the area outside the transformed image.
Defaults to (114, 114, 114).
auto (bool, optional): auto padding. Defaults to False.
stride (int, optional): padding stride. Defaults to 32.
Returns:
np.ndarray: Output image
"""
# Resize and pad image while meeting stride-multiple constraints
im = np.array(im)
shape = im.shape[:2] # current shape [height, width]
if isinstance(new_shape, int):
new_shape = (new_shape, new_shape)

Check warning on line 41 in pyroengine/utils.py

View check run for this annotation

Codecov / codecov/patch

pyroengine/utils.py#L41

Added line #L41 was not covered by tests

# Scale ratio (new / old)
r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])

# Compute padding
new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))
dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1] # wh padding

if auto: # minimum rectangle
dw, dh = np.mod(dw, stride), np.mod(dh, stride) # wh padding

Check warning on line 51 in pyroengine/utils.py

View check run for this annotation

Codecov / codecov/patch

pyroengine/utils.py#L51

Added line #L51 was not covered by tests

dw /= 2 # divide padding into 2 sides
dh /= 2

if shape[::-1] != new_unpad: # resize
im = cv2.resize(im, new_unpad, interpolation=cv2.INTER_LINEAR)
top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
# add border
h, w = im.shape[:2]
im_b = np.zeros((h + top + bottom, w + left + right, 3)) + color
im_b[top : top + h, left : left + w, :] = im

return im_b.astype("uint8"), (left, top)


def box_iou(box1: np.ndarray, box2: np.ndarray, eps: float = 1e-7):
"""
Calculate intersection-over-union (IoU) of boxes.
Expand Down
33 changes: 16 additions & 17 deletions pyroengine/vision.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,12 @@
from typing import Optional, Tuple
from urllib.request import urlretrieve

import cv2 # type: ignore[import-untyped]
import numpy as np
import onnxruntime
from huggingface_hub import HfApi # type: ignore[import-untyped]
from PIL import Image

from .utils import DownloadProgressBar, nms, xywh2xyxy
from .utils import DownloadProgressBar, letterbox, nms, xywh2xyxy

__all__ = ["Classifier"]

Expand All @@ -41,7 +40,7 @@ class Classifier:
model_path: model path
"""

def __init__(self, model_path: Optional[str] = "data/model.onnx", base_img_size: int = 640) -> None:
def __init__(self, model_path: Optional[str] = "data/model.onnx", img_size: tuple = (640, 640)) -> None:
if model_path is None:
model_path = "data/model.onnx"

Expand All @@ -67,7 +66,7 @@ def __init__(self, model_path: Optional[str] = "data/model.onnx", base_img_size:
self.download_model(model_path, expected_sha256)

self.ort_session = onnxruntime.InferenceSession(model_path)
self.base_img_size = base_img_size
self.img_size = img_size

def get_sha(self, siblings):
# Extract the SHA256 hash from the model files metadata
Expand Down Expand Up @@ -99,7 +98,7 @@ def load_metadata(self, metadata_path):
return json.load(f)
return None

def preprocess_image(self, pil_img: Image.Image, new_img_size: list) -> Tuple[np.ndarray, Tuple[int, int]]:
def preprocess_image(self, pil_img: Image.Image) -> Tuple[np.ndarray, Tuple[int, int]]:
"""Preprocess an image for inference

Args:
Expand All @@ -111,20 +110,15 @@ def preprocess_image(self, pil_img: Image.Image, new_img_size: list) -> Tuple[np
- Padding information as a tuple of integers (pad_height, pad_width).
"""

np_img = cv2.resize(np.array(pil_img), new_img_size, interpolation=cv2.INTER_LINEAR)
np_img, pad = letterbox(np.array(pil_img), self.img_size) # Applies letterbox resize with padding
np_img = np.expand_dims(np_img.astype("float"), axis=0) # Add batch dimension
np_img = np.ascontiguousarray(np_img.transpose((0, 3, 1, 2))) # Convert from BHWC to BCHW format
np_img = np_img.astype("float32") / 255 # Normalize to [0, 1]

return np_img
return np_img, pad

def __call__(self, pil_img: Image.Image, occlusion_mask: Optional[np.ndarray] = None) -> np.ndarray:

w, h = pil_img.size
ratio = self.base_img_size / max(w, h)
new_img_size = [int(ratio * w), int(ratio * h)]
new_img_size = [x - x % 32 for x in new_img_size] # size need to be a multiple of 32 to fit the model
np_img = self.preprocess_image(pil_img, new_img_size)
np_img, pad = self.preprocess_image(pil_img)

# ONNX inference
y = self.ort_session.run(["output0"], {"images": np_img})[0][0]
Expand All @@ -136,12 +130,17 @@ def __call__(self, pil_img: Image.Image, occlusion_mask: Optional[np.ndarray] =
# Sort by confidence
y = y[y[:, 4].argsort()]
y = nms(y)
y = y[::-1]

# Normalize preds
if len(y) > 0:
# Normalize Output
y[:, :4:2] /= new_img_size[0]
y[:, 1:4:2] /= new_img_size[1]
# Remove padding
left_pad, top_pad = pad
y[:, :4:2] -= left_pad
y[:, 1:4:2] -= top_pad
y[:, :4:2] /= self.img_size[1] - 2 * left_pad
y[:, 1:4:2] /= self.img_size[0] - 2 * top_pad
y = np.clip(y, 0, 1)
else:
y = np.zeros((0, 5)) # normalize output

Expand All @@ -162,4 +161,4 @@ def __call__(self, pil_img: Image.Image, occlusion_mask: Optional[np.ndarray] =

y = y[keep]

return np.clip(y, 0, 1)
return y
11 changes: 7 additions & 4 deletions tests/test_vision.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,22 +30,25 @@ def test_classifier(mock_wildfire_image):
# Instantiate the ONNX model
model = Classifier()
# Check preprocessing
out = model.preprocess_image(mock_wildfire_image, (640, 384))
out, pad = model.preprocess_image(mock_wildfire_image)
assert isinstance(out, np.ndarray) and out.dtype == np.float32
assert out.shape == (1, 3, 384, 640)
assert out.shape == (1, 3, 640, 640)
assert isinstance(pad, tuple)
# Check inference
out = model(mock_wildfire_image)
assert out.shape == (1, 5)
conf = np.max(out[:, 4])
assert conf >= 0 and conf <= 1

# Test mask
mask = np.ones((640, 384))
mask = np.ones((384, 640))
out = model(mock_wildfire_image, mask)
print(out)
assert out.shape == (1, 5)

mask = np.zeros((640, 384))
mask = np.zeros((384, 640))
out = model(mock_wildfire_image, mask)
print(out)
assert out.shape == (0, 5)
os.remove(model_path)
os.remove(METADATA_PATH)
Expand Down
Loading