Skip to content

Commit

Permalink
fix: use new barcode normalization
Browse files Browse the repository at this point in the history
  • Loading branch information
raphael0202 committed Oct 7, 2024
1 parent c53617d commit 65e16dd
Show file tree
Hide file tree
Showing 7 changed files with 344 additions and 356 deletions.
639 changes: 334 additions & 305 deletions poetry.lock

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ lark = "~1.1.4"
h5py = "~3.8.0"
opencv-contrib-python-headless = "~4.10.0.84"
toml = "~0.10.2"
openfoodfacts = "1.1.1"
openfoodfacts = "1.1.2"
imagehash = "~4.3.1"
peewee-migrate = "~1.12.2"
diskcache = "~5.6.3"
Expand Down
10 changes: 3 additions & 7 deletions robotoff/app/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
import requests
from falcon.media.validators import jsonschema
from openfoodfacts import OCRResult
from openfoodfacts.images import extract_barcode_from_url
from openfoodfacts.ocr import OCRParsingException, OCRResultGenerationException
from openfoodfacts.types import COUNTRY_CODE_TO_NAME, Country
from PIL import Image
Expand Down Expand Up @@ -62,12 +63,7 @@
batch_insert,
db,
)
from robotoff.off import (
OFFAuthentication,
generate_image_path,
generate_json_ocr_url,
get_barcode_from_url,
)
from robotoff.off import OFFAuthentication, generate_image_path, generate_json_ocr_url
from robotoff.prediction import image_classifier, ingredient_list
from robotoff.prediction.category import predict_category
from robotoff.prediction.langid import predict_lang
Expand Down Expand Up @@ -543,7 +539,7 @@ class OCRPredictionPredictorResource:
def on_get(self, req: falcon.Request, resp: falcon.Response):
ocr_url = req.get_param("ocr_url", required=True)
server_type = get_server_type_from_req(req)
barcode = get_barcode_from_url(ocr_url)
barcode = extract_barcode_from_url(ocr_url)
prediction_types = req.get_param_as_list(
"prediction_types",
default=DEFAULT_OCR_PREDICTION_TYPES,
Expand Down
4 changes: 2 additions & 2 deletions robotoff/cli/insights.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,10 @@
import dacite
import orjson
import tqdm
from openfoodfacts.images import extract_barcode_from_path
from openfoodfacts.ocr import OCRResult

from robotoff.insights.extraction import DEFAULT_OCR_PREDICTION_TYPES
from robotoff.off import get_barcode_from_path
from robotoff.prediction.ocr import extract_predictions
from robotoff.prediction.ocr.core import ocr_content_iter
from robotoff.types import Prediction, PredictionType, ProductIdentifier, ServerType
Expand Down Expand Up @@ -83,7 +83,7 @@ def generate_from_ocr_archive(
if source_image is None:
continue

barcode: Optional[str] = get_barcode_from_path(source_image)
barcode = extract_barcode_from_path(source_image)

if barcode is None:
logger.warning("cannot extract barcode from source: %s", source_image)
Expand Down
5 changes: 3 additions & 2 deletions robotoff/cli/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -527,10 +527,11 @@ def run_object_detection_model(
from urllib.parse import urlparse

import tqdm
from openfoodfacts.images import extract_barcode_from_url
from peewee import JOIN

from robotoff.models import ImageModel, ImagePrediction, db
from robotoff.off import generate_image_url, get_barcode_from_url
from robotoff.off import generate_image_url
from robotoff.utils import text_file_iter
from robotoff.workers.queues import enqueue_job, low_queue
from robotoff.workers.tasks.import_image import (
Expand Down Expand Up @@ -588,7 +589,7 @@ def run_object_detection_model(

if typer.confirm(f"{len(image_urls)} jobs are going to be launched, confirm?"):
for image_url in tqdm.tqdm(image_urls, desc="image"):
barcode = get_barcode_from_url(image_url)
barcode = extract_barcode_from_url(image_url)
if barcode is None:
raise RuntimeError()
enqueue_job(
Expand Down
17 changes: 0 additions & 17 deletions robotoff/off.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,23 +80,6 @@ def get_source_from_url(ocr_url: str) -> str:
return url_path


def get_barcode_from_url(url: str) -> Optional[str]:
url_path = urlparse(url).path
return get_barcode_from_path(url_path)


def get_barcode_from_path(path: str) -> Optional[str]:
barcode = ""

for parent in Path(path).parents:
if parent.name.isdigit():
barcode = parent.name + barcode
else:
break

return barcode or None


def _generate_file_path(product_id: ProductIdentifier, image_id: str, suffix: str):
splitted_barcode = split_barcode(product_id.barcode)
return f"/{'/'.join(splitted_barcode)}/{image_id}{suffix}"
Expand Down
23 changes: 1 addition & 22 deletions tests/unit/test_off.py
Original file line number Diff line number Diff line change
@@ -1,27 +1,6 @@
from typing import Optional

import pytest

from robotoff.off import get_barcode_from_url, get_source_from_url


@pytest.mark.parametrize(
"url,output",
[
(
"https://world.openfoodfacts.org/images/products/541/012/672/6954/1.jpg",
"5410126726954",
),
(
"https://world.openfoodfacts.org/images/products/541/012/672/6954/1.json",
"5410126726954",
),
("https://world.openfoodfacts.org/images/products/invalid/1.json", None),
("https://world.openfoodfacts.org/images/products/252/535.bk/1.jpg", None),
],
)
def test_get_barcode_from_url(url: str, output: Optional[str]):
assert get_barcode_from_url(url) == output
from robotoff.off import get_source_from_url


@pytest.mark.parametrize(
Expand Down

0 comments on commit 65e16dd

Please sign in to comment.