Skip to content

Commit

Permalink
feat: Add image classifier models (#1421)
Browse files Browse the repository at this point in the history
* feat: add support for image classifier models

Integration for Ultralytics v8 classification models through Triton

* chore: upgrade to latest version of triton server
  • Loading branch information
raphael0202 authored Sep 30, 2024
1 parent 89f20ab commit bd369da
Show file tree
Hide file tree
Showing 6 changed files with 219 additions and 29 deletions.
13 changes: 13 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@ HOSTS=127.0.0.1 robotoff.openfoodfacts.localhost
DOCKER_COMPOSE=docker compose --env-file=${ENV_FILE}
DOCKER_COMPOSE_TEST=COMPOSE_PROJECT_NAME=robotoff_test COMMON_NET_NAME=po_test docker compose --env-file=${ENV_FILE}
ML_OBJECT_DETECTION_MODELS := tf-universal-logo-detector tf-nutrition-table tf-nutriscore
# Use bash shell for variable substitution
SHELL := /bin/bash

# Spellcheck
SPELLCHECK_IMAGE_NAME = spellcheck-batch-vllm
Expand Down Expand Up @@ -148,6 +150,17 @@ dl-ingredient-detection-model:
tar -xzvf onnx.tar.gz --strip-component=1; \
rm onnx.tar.gz

dl-image-clf-models:
@echo "⏬ Downloading image classification model files …"
mkdir -p models/triton; \
cd models/triton; \
for asset_name in 'price-proof-classification'; \
do \
dir=$${asset_name//-/_}/1; \
mkdir -p $${dir}; \
wget -cO - https://huggingface.co/openfoodfacts/$${asset_name}/resolve/main/weights/best.onnx > $${dir}/model.onnx; \
done;

init-elasticsearch:
@echo "Initializing elasticsearch indices"
${DOCKER_COMPOSE} up -d elasticsearch 2>&1
Expand Down
8 changes: 4 additions & 4 deletions docker/ml-gpu.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,11 @@
services:
triton:
restart: $RESTART_POLICY
image: nvcr.io/nvidia/tritonserver:24.01-py3
image: nvcr.io/nvidia/tritonserver:24.08-py3
ports:
- ${TRITON_EXPOSE_HTTP:-8000}:8000
- ${TRITON_EXPOSE_GRPC:-8001}:8001
- ${TRITON_EXPOSE_METRICS:-8002}:8002
- ${TRITON_EXPOSE_HTTP:-8000}:8000
- ${TRITON_EXPOSE_GRPC:-8001}:8001
- ${TRITON_EXPOSE_METRICS:-8002}:8002
volumes:
- ${TRITON_MODELS_DIR:-../models/triton}:/models
# We need to add nvidia_entrypoint.sh for the GPU to be correctly detected
Expand Down
8 changes: 1 addition & 7 deletions docker/ml.yml
Original file line number Diff line number Diff line change
@@ -1,13 +1,7 @@
services:
triton:
restart: $RESTART_POLICY
# This is a custom built of Triton with:
# - GRPC/HTTP support
# - CPU only (we don't have GPU in production)
# - Tensorflow 2 SavedModel and ONNX support
# This allows us to reduce significantly the image size
# See https://gist.github.com/raphael0202/091e521f2c79a8db8c6e9aceafb6e0b9 for build script
image: ghcr.io/openfoodfacts/triton:cpu
image: nvcr.io/nvidia/tritonserver:24.08-py3
ports:
- ${TRITON_EXPOSE_HTTP:-8000}:8000
- ${TRITON_EXPOSE_GRPC:-8001}:8001
Expand Down
57 changes: 39 additions & 18 deletions robotoff/app/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,14 +68,15 @@
generate_json_ocr_url,
get_barcode_from_url,
)
from robotoff.prediction import ingredient_list
from robotoff.prediction import image_classifier, ingredient_list
from robotoff.prediction.category import predict_category
from robotoff.prediction.langid import predict_lang
from robotoff.prediction.object_detection import ObjectDetectionModelRegistry
from robotoff.products import get_image_id, get_product, get_product_dataset_etag
from robotoff.taxonomy import is_prefixed_value, match_taxonomized_value
from robotoff.types import (
BatchJobType,
ImageClassificationModel,
InsightType,
JSONType,
NeuralCategoryClassifierModel,
Expand Down Expand Up @@ -858,27 +859,36 @@ def on_get(self, req: falcon.Request, resp: falcon.Response):
image_url = req.get_param("image_url", required=True)
models: list[str] = req.get_param_as_list("models", required=True)

available_models = ObjectDetectionModelRegistry.get_available_models()
available_object_detection_models = (
ObjectDetectionModelRegistry.get_available_models()
)
available_clf_models = list(ImageClassificationModel.__members__.keys())
available_models = available_object_detection_models + available_clf_models

for model_name in models:
if model_name not in available_models:
raise falcon.HTTPBadRequest(
"invalid_model",
"unknown model {}, available models: {}"
"".format(model_name, ", ".join(available_models)),
f"unknown model {model_name}, available models: {', '.join(available_models)}",
)

output_image = req.get_param_as_bool("output_image")

if output_image is None:
output_image = False

if output_image and len(models) != 1:
raise falcon.HTTPBadRequest(
"invalid_request",
"a single model must be specified with the `models` parameter "
"when `output_image` is True",
)
if output_image:
if len(models) != 1:
raise falcon.HTTPBadRequest(
"invalid_request",
"a single model must be specified with the `models` parameter "
"when `output_image` is True",
)
if models[0] not in available_object_detection_models:
raise falcon.HTTPBadRequest(
"invalid_request",
f"model {models[0]} does not support image output",
)

image = get_image_from_url(
image_url, session=http_session, error_raise=False, use_cache=True
Expand All @@ -890,15 +900,26 @@ def on_get(self, req: falcon.Request, resp: falcon.Response):
predictions = {}

for model_name in models:
model = ObjectDetectionModelRegistry.get(model_name)
result = model.detect_from_image(image, output_image=output_image)

if output_image:
boxed_image = cast(Image.Image, result.boxed_image)
image_response(boxed_image, resp)
return
if model_name in available_object_detection_models:
model = ObjectDetectionModelRegistry.get(model_name)
result = model.detect_from_image(image, output_image=output_image)

if output_image:
boxed_image = cast(Image.Image, result.boxed_image)
image_response(boxed_image, resp)
return
else:
predictions[model_name] = result.to_json()
else:
predictions[model_name] = result.to_json()
model_enum = ImageClassificationModel[model_name]
classifier = image_classifier.ImageClassifier(
model_enum.name,
label_names=image_classifier.LABEL_NAMES[model_enum],
)
predictions[model_name] = [
{"label": label, "score": score}
for label, score in classifier.predict(image)
]

resp.media = {"predictions": predictions}

Expand Down
158 changes: 158 additions & 0 deletions robotoff/prediction/image_classifier.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,158 @@
import math
import time
import typing

import numpy as np
from PIL import Image, ImageOps
from tritonclient.grpc import service_pb2

from robotoff.triton import get_triton_inference_stub
from robotoff.types import ImageClassificationModel
from robotoff.utils import get_logger

logger = get_logger(__name__)


LABEL_NAMES = {
ImageClassificationModel.price_proof_classification: [
"OTHER",
"PRICE_TAG",
"PRODUCT_WITH_PRICE",
"RECEIPT",
"SHELF",
"WEB_PRINT",
]
}


def classify_transforms(
img: Image.Image,
size: int = 224,
mean=(0.0, 0.0, 0.0),
std=(1.0, 1.0, 1.0),
interpolation=Image.Resampling.BILINEAR,
crop_fraction: float = 1.0,
) -> np.ndarray:
"""
Applies a series of image transformations including resizing, center cropping,
normalization, and conversion to a NumPy array.
Transformation steps is based on the one used in the Ultralytics library:
https://github.com/ultralytics/ultralytics/blob/main/ultralytics/data/augment.py#L2319
:param img: Input Pillow image.
:param size: The target size for the transformed image (shortest edge).
:param mean: Mean values for each RGB channel used in normalization.
:param std: Standard deviation values for each RGB channel used in normalization.
:param interpolation: Interpolation method from PIL (Image.Resampling.NEAREST,
Image.Resampling.BILINEAR, Image.Resampling.BICUBIC).
:param crop_fraction: Fraction of the image to be cropped.
:return: The transformed image as a NumPy array.
"""
if img.mode != "RGB":
img = img.convert("RGB")

# Rotate the image based on the EXIF orientation if needed
img = typing.cast(Image.Image, ImageOps.exif_transpose(img))

# Step 1: Resize while preserving the aspect ratio
width, height = img.size

# Calculate scale size while preserving aspect ratio
scale_size = math.floor(size / crop_fraction)

aspect_ratio = width / height
if width < height:
new_width = scale_size
new_height = int(new_width / aspect_ratio)
else:
new_height = scale_size
new_width = int(new_height * aspect_ratio)

img = img.resize((new_width, new_height), interpolation)

# Step 2: Center crop
left = (new_width - size) // 2
top = (new_height - size) // 2
right = left + size
bottom = top + size
img = img.crop((left, top, right, bottom))

# Step 3: Convert the image to a NumPy array and scale pixel values to [0, 1]
img_array = np.array(img).astype(np.float32) / 255.0

# Step 4: Normalize the image
mean = np.array(mean, dtype=np.float32).reshape(1, 1, 3)
std = np.array(std, dtype=np.float32).reshape(1, 1, 3)
img_array = (img_array - mean) / std

# Step 5: Change the order of dimensions from (H, W, C) to (C, H, W)
img_array = np.transpose(img_array, (2, 0, 1))
return img_array


class ImageClassifier:
def __init__(self, name: str, label_names: list[str]):
self.name: str = name
self.label_names = label_names

def predict(
self,
image: Image.Image,
triton_uri: str | None = None,
) -> list[tuple[str, float]]:
"""Run an image classification model on an image.
The model is expected to have been trained with Ultralytics library (Yolov8).
:param image: the input Pillow image
:param triton_uri: URI of the Triton Inference Server, defaults to
None. If not provided, the default value from settings is used.
:return: the prediction results as a list of tuples (label, confidence)
"""
image_array = classify_transforms(image)
image_array = np.expand_dims(image_array, axis=0)

grpc_stub = get_triton_inference_stub(triton_uri)
request = service_pb2.ModelInferRequest()
request.model_name = self.name

image_input = service_pb2.ModelInferRequest().InferInputTensor()
image_input.name = "images"

image_input.datatype = "FP32"

image_input.shape.extend([1, 3, 224, 224])
request.inputs.extend([image_input])

output = service_pb2.ModelInferRequest().InferRequestedOutputTensor()
output.name = "output0"
request.outputs.extend([output])

request.raw_input_contents.extend([image_array.tobytes()])
start_time = time.monotonic()
response = grpc_stub.ModelInfer(request)
latency = time.monotonic() - start_time

logger.debug("Inference time for %s: %s", self.name, latency)

start_time = time.monotonic()
if len(response.outputs) != 1:
raise Exception(f"expected 1 output, got {len(response.outputs)}")

if len(response.raw_output_contents) != 1:
raise Exception(
f"expected 1 raw output content, got {len(response.raw_output_contents)}"
)

output_index = {output.name: i for i, output in enumerate(response.outputs)}
output = np.frombuffer(
response.raw_output_contents[output_index["output0"]],
dtype=np.float32,
).reshape((1, len(self.label_names)))[0]

score_indices = np.argsort(-output)

latency = time.monotonic() - start_time
logger.debug("Post-processing time for %s: %s", self.name, latency)
return [(self.label_names[i], float(output[i])) for i in score_indices]
4 changes: 4 additions & 0 deletions robotoff/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,10 @@ def get_type(self) -> str:
return "universal-logo-detector"


class ImageClassificationModel(str, enum.Enum):
price_proof_classification = enum.auto()


@enum.unique
class NeuralCategoryClassifierModel(enum.Enum):
keras_image_embeddings_3_0 = "keras-image-embeddings-3.0"
Expand Down

0 comments on commit bd369da

Please sign in to comment.