Skip to content

Commit

Permalink
fix: fix mypy and flake8 issues
Browse files Browse the repository at this point in the history
  • Loading branch information
raphael0202 committed Sep 10, 2024
1 parent 019f6c9 commit 0d9d34a
Show file tree
Hide file tree
Showing 12 changed files with 987 additions and 1,010 deletions.
2 changes: 1 addition & 1 deletion .flake8
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,4 @@ max-line-length = 88
exclude = .git,__pycache__,build,dist,*_pb2.py,.venv
per-file-ignores =
robotoff/cli/main.py:B008
max-doc-length = 79
max-doc-length = 88
13 changes: 6 additions & 7 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,9 @@ DOCKER_COMPOSE_TEST=COMPOSE_PROJECT_NAME=robotoff_test COMMON_NET_NAME=po_test d
ML_OBJECT_DETECTION_MODELS := tf-universal-logo-detector tf-nutrition-table tf-nutriscore

# Spellcheck
IMAGE_NAME = spellcheck-batch-vllm
TAG = latest
GCLOUD_LOCATION = europe-west9-docker.pkg.dev
REGISTRY = ${GCLOUD_LOCATION}/robotoff/gcf-artifacts
SPELLCHECK_IMAGE_NAME = spellcheck-batch-vllm
SPELLCHECK_TAG = latest
SPELLCHECK_REGISTRY = europe-west9-docker.pkg.dev/robotoff/gcf-artifacts

.DEFAULT_GOAL := dev
# avoid target corresponding to file names, to depends on them
Expand Down Expand Up @@ -300,12 +299,12 @@ create-po-default-network:

# Spellcheck
build-spellcheck:
docker build -f batch/spellcheck/Dockerfile -t $(IMAGE_NAME):$(TAG) batch/spellcheck
docker build -f batch/spellcheck/Dockerfile -t $(SPELLCHECK_IMAGE_NAME):$(SPELLCHECK_TAG) batch/spellcheck

# Push the image to the registry
push-spellcheck:
docker tag $(IMAGE_NAME):$(TAG) $(REGISTRY)/$(IMAGE_NAME):$(TAG)
docker push $(REGISTRY)/$(IMAGE_NAME):$(TAG)
docker tag $(SPELLCHECK_IMAGE_NAME):$(SPELLCHECK_TAG) $(SPELLCHECK_REGISTRY)/$(SPELLCHECK_IMAGE_NAME):$(SPELLCHECK_TAG)
docker push $(SPELLCHECK_REGISTRY)/$(SPELLCHECK_IMAGE_NAME):$(SPELLCHECK_TAG)

# Build and push in one command
deploy-spellcheck:
Expand Down
1 change: 1 addition & 0 deletions docker/dev.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ x-robotoff-dev: &robotoff-dev
- ./pyproject.toml:/opt/robotoff/pyproject.toml
- ./poetry.toml:/opt/robotoff/poetry.toml
- ./poetry.lock:/opt/robotoff/poetry.lock
- ./.flake8:/opt/robotoff/.flake8
# make tests available
- ./tests:/opt/robotoff/tests
- ./.cov:/opt/robotoff/.cov
Expand Down
1,922 changes: 951 additions & 971 deletions poetry.lock

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,7 @@ rq-dashboard = "~0.6.1"

[tool.poetry.group.dev.dependencies]
types-pytz = "^2024.1.0.20240417"
types-pyyaml = "^6.0.12.20240808"

[tool.poetry.scripts]
robotoff-cli = 'robotoff.cli.main:main'
2 changes: 0 additions & 2 deletions robotoff/app/auth.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,9 +46,7 @@ def validate_token(token: str, ref_token_name: str) -> bool:
"""Validate token.
:param token: Authentification token
:type token: str
:param api_token_name: Validation token, stored in environment variables.
:type api_token_name: str
"""
api_token = os.getenv(ref_token_name.upper())
if not api_token:
Expand Down
5 changes: 0 additions & 5 deletions robotoff/batch/buckets.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,8 @@ def upload_file_to_gcs(file_path: str, bucket_name: str, suffix: str) -> None:
"""Upload file to Google Storage Bucket.
:param file_path: File where the data is stored
:type file_path: str
:param bucket_name: Bucket name in GCP storage
:type bucket_name: str
:param suffix: Path inside the bucket
:type suffix: str
"""
client = storage.Client()
bucket = client.get_bucket(bucket_name)
Expand All @@ -23,9 +20,7 @@ def fetch_dataframe_from_gcs(bucket_name: str, suffix: str) -> pd.DataFrame:
:param bucket_name: Bucket name in GCP storage
:type bucket_name: str
:param suffix: Path inside the bucket. Should lead to a parquet file.
:type suffix: str
:return: Dataframe
"""
client = storage.Client()
Expand Down
3 changes: 2 additions & 1 deletion robotoff/batch/extraction.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,8 @@ def extract_from_dataset(
:type query_file_path: Path
:param output_file_path: Path to save the extracted data.
:type output_file_path: str
:param dataset_path: Compressed jsonl database, defaults to settings.JSONL_DATASET_PATH
:param dataset_path: Compressed jsonl database, defaults to
settings.JSONL_DATASET_PATH
:type dataset_path: Path, optional
"""
if not dataset_path.exists():
Expand Down
27 changes: 13 additions & 14 deletions robotoff/batch/launch.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,19 +22,20 @@ def check_google_credentials() -> None:
raise ValueError("GOOGLE_APPLICATION_CREDENTIALS is not set")
if not os.path.exists(credentials_path):
logger.info(
"No google credentials found at %s. Creating credentials from GOOGLE_CREDENTIALS.",
"No google credentials found at %s. Creating credentials from GOOGLE_CREDENTIALS.",
credentials_path,
)
os.makedirs(os.path.dirname(credentials_path), exist_ok=True)
credentials = json.loads(os.getenv("GOOGLE_CREDENTIALS"))
with open(os.getenv("GOOGLE_APPLICATION_CREDENTIALS"), "w") as f:
credentials = json.loads(os.environ["GOOGLE_CREDENTIALS"])
with open(credentials_path, "w") as f:
json.dump(credentials, f, indent=4)


class GoogleBatchJobConfig(BaseModel):
"""Batch job configuration class."""

# By default, extra fields are just ignored. We raise an error in case of extra fields.
# By default, extra fields are just ignored. We raise an error in case of extra
# fields.
model_config: ConfigDict = {"extra": "forbid"}

job_name: str = Field(
Expand Down Expand Up @@ -118,14 +119,13 @@ def init(
config_path: Path,
env_names: Optional[Iterable[str]] = None,
) -> "GoogleBatchJobConfig":
"""Initialize the class with the configuration file corresponding to the job type.
"""Initialize the class with the configuration file corresponding to the job
type.
:param job_name: Name of the job.
:type job_name: str
:param config_path: Path to the configuration file.
:type config_path: Path
:param env_variables: List of environment variables to add to the job, defaults to None.
:type env_variables: Optional[Iterable[str]], optional
:param env_variables: List of environment variables to add to the job, defaults
to None.
"""
# Batch job name should respect a specific pattern, or returns an error
pattern = "^[a-z]([a-z0-9-]{0,61}[a-z0-9])?$"
Expand All @@ -143,7 +143,7 @@ def init(
if not env_names:
env_variables = {}
else:
env_variables = {var_name: os.getenv(var_name) for var_name in env_names}
env_variables = {var_name: os.environ[var_name] for var_name in env_names}

# Load config file from job_type
with open(config_path, "r") as f:
Expand All @@ -156,12 +156,10 @@ def launch_job(batch_job_config: GoogleBatchJobConfig) -> batch_v1.Job:
Sources:
* https://github.com/GoogleCloudPlatform/python-docs-samples/tree/main/batch/create
* https://cloud.google.com/python/docs/reference/batch/latest/google.cloud.batch_v1.types
* https://cloud.google.com/python/docs/reference/batch/latest/google.cloud.batch_v1.types # noqa
:param google_batch_launch_config: Config to run a job on Google Batch.
:type google_batch_launch_config: GoogleBatchLaunchConfig
:param batch_job_config: Config to run a specific job on Google Batch.
:type batch_job_config: BatchJobConfig
:return: Batch job information.
Returns:
Expand Down Expand Up @@ -200,7 +198,8 @@ def launch_job(batch_job_config: GoogleBatchJobConfig) -> batch_v1.Job:
group.task_count = batch_job_config.task_count
group.task_spec = task

# Policies are used to define on what kind of virtual machines the tasks will run on.
# Policies are used to define on what kind of virtual machines the tasks will run
# on.
policy = batch_v1.AllocationPolicy.InstancePolicy()
policy.machine_type = batch_job_config.machine_type
instances = batch_v1.AllocationPolicy.InstancePolicyOrTemplate()
Expand Down
3 changes: 1 addition & 2 deletions robotoff/cli/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -1015,8 +1015,7 @@ def launch_batch_job(
)

get_logger()
job_type = BatchJobType[job_type]
_launch_batch_job(job_type)
_launch_batch_job(BatchJobType[job_type])


def main() -> None:
Expand Down
15 changes: 9 additions & 6 deletions robotoff/insights/importer.py
Original file line number Diff line number Diff line change
Expand Up @@ -1509,13 +1509,16 @@ def is_conflicting_insight(
def _keep_prediction(
cls, prediction: Prediction, product: Optional[Product]
) -> bool:
conditions = [
return (
# Spellcheck didn't correct
prediction.data["original"] != prediction.data["correction"],
# Modification of the original ingredients between two dataset dumps (24-hour period)
product is None or prediction.data["original"] != product.ingredients_text,
]
return all(conditions)
prediction.data["original"] != prediction.data["correction"]
# Modification of the original ingredients between two dataset dumps
# (24-hour period)
and (
product is None
or prediction.data["original"] != product.ingredients_text
)
)


class PackagingElementTaxonomyException(Exception):
Expand Down
3 changes: 2 additions & 1 deletion tests/unit/test_batch.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,8 @@
],
)
def test_batch_job_config_file(inputs):
"Test indirectly the batch job config file by validating with the Pydantic class model."
"""Test indirectly the batch job config file by validating with the Pydantic class
model."""
job_name, config_path, env_names = inputs
GoogleBatchJobConfig.init(
job_name=job_name,
Expand Down

0 comments on commit 0d9d34a

Please sign in to comment.