fix: fix mypy and flake8 issues

openfoodfacts · Sep 10, 2024 · 0d9d34a · 0d9d34a
1 parent 019f6c9
commit 0d9d34a
Show file tree

Hide file tree

Showing 12 changed files with 987 additions and 1,010 deletions.
diff --git a/.flake8 b/.flake8
@@ -4,4 +4,4 @@ max-line-length = 88
 exclude = .git,__pycache__,build,dist,*_pb2.py,.venv
 per-file-ignores =
     robotoff/cli/main.py:B008
-max-doc-length = 79
+max-doc-length = 88
diff --git a/Makefile b/Makefile
@@ -19,10 +19,9 @@ DOCKER_COMPOSE_TEST=COMPOSE_PROJECT_NAME=robotoff_test COMMON_NET_NAME=po_test d
 ML_OBJECT_DETECTION_MODELS := tf-universal-logo-detector tf-nutrition-table tf-nutriscore
 
 # Spellcheck
-IMAGE_NAME = spellcheck-batch-vllm
-TAG = latest
-GCLOUD_LOCATION = europe-west9-docker.pkg.dev
-REGISTRY = ${GCLOUD_LOCATION}/robotoff/gcf-artifacts
+SPELLCHECK_IMAGE_NAME = spellcheck-batch-vllm
+SPELLCHECK_TAG = latest
+SPELLCHECK_REGISTRY = europe-west9-docker.pkg.dev/robotoff/gcf-artifacts
 
 .DEFAULT_GOAL := dev
 # avoid target corresponding to file names, to depends on them
@@ -300,12 +299,12 @@ create-po-default-network:
 
 # Spellcheck
 build-spellcheck:
-	docker build -f batch/spellcheck/Dockerfile -t $(IMAGE_NAME):$(TAG) batch/spellcheck
+	docker build -f batch/spellcheck/Dockerfile -t $(SPELLCHECK_IMAGE_NAME):$(SPELLCHECK_TAG) batch/spellcheck
 
 # Push the image to the registry
 push-spellcheck:
-	docker tag $(IMAGE_NAME):$(TAG) $(REGISTRY)/$(IMAGE_NAME):$(TAG)
-	docker push $(REGISTRY)/$(IMAGE_NAME):$(TAG)
+	docker tag $(SPELLCHECK_IMAGE_NAME):$(SPELLCHECK_TAG) $(SPELLCHECK_REGISTRY)/$(SPELLCHECK_IMAGE_NAME):$(SPELLCHECK_TAG)
+	docker push $(SPELLCHECK_REGISTRY)/$(SPELLCHECK_IMAGE_NAME):$(SPELLCHECK_TAG)
 
 # Build and push in one command
 deploy-spellcheck: 

diff --git a/docker/dev.yml b/docker/dev.yml
@@ -14,6 +14,7 @@ x-robotoff-dev: &robotoff-dev
       - ./pyproject.toml:/opt/robotoff/pyproject.toml
       - ./poetry.toml:/opt/robotoff/poetry.toml
       - ./poetry.lock:/opt/robotoff/poetry.lock
+      - ./.flake8:/opt/robotoff/.flake8
       # make tests available
       - ./tests:/opt/robotoff/tests
       - ./.cov:/opt/robotoff/.cov

diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -119,6 +119,7 @@ rq-dashboard = "~0.6.1"
 
 [tool.poetry.group.dev.dependencies]
 types-pytz = "^2024.1.0.20240417"
+types-pyyaml = "^6.0.12.20240808"
 
 [tool.poetry.scripts]
 robotoff-cli = 'robotoff.cli.main:main'
diff --git a/robotoff/app/auth.py b/robotoff/app/auth.py
@@ -46,9 +46,7 @@ def validate_token(token: str, ref_token_name: str) -> bool:
     """Validate token.
 
     :param token: Authentification token
-    :type token: str
     :param api_token_name: Validation token, stored in environment variables.
-    :type api_token_name: str
     """
     api_token = os.getenv(ref_token_name.upper())
     if not api_token:

diff --git a/robotoff/batch/buckets.py b/robotoff/batch/buckets.py
@@ -6,11 +6,8 @@ def upload_file_to_gcs(file_path: str, bucket_name: str, suffix: str) -> None:
     """Upload file to Google Storage Bucket.
 
     :param file_path: File where the data is stored
-    :type file_path: str
     :param bucket_name: Bucket name in GCP storage
-    :type bucket_name: str
     :param suffix: Path inside the bucket
-    :type suffix: str
     """
     client = storage.Client()
     bucket = client.get_bucket(bucket_name)
@@ -23,9 +20,7 @@ def fetch_dataframe_from_gcs(bucket_name: str, suffix: str) -> pd.DataFrame:
 
 
     :param bucket_name: Bucket name in GCP storage
-    :type bucket_name: str
     :param suffix: Path inside the bucket. Should lead to a parquet file.
-    :type suffix: str
     :return: Dataframe
     """
     client = storage.Client()

diff --git a/robotoff/batch/extraction.py b/robotoff/batch/extraction.py
@@ -19,7 +19,8 @@ def extract_from_dataset(
     :type query_file_path: Path
     :param output_file_path: Path to save the extracted data.
     :type output_file_path: str
-    :param dataset_path: Compressed jsonl database, defaults to settings.JSONL_DATASET_PATH
+    :param dataset_path: Compressed jsonl database, defaults to
+        settings.JSONL_DATASET_PATH
     :type dataset_path: Path, optional
     """
     if not dataset_path.exists():

diff --git a/robotoff/batch/launch.py b/robotoff/batch/launch.py
@@ -22,19 +22,20 @@ def check_google_credentials() -> None:
         raise ValueError("GOOGLE_APPLICATION_CREDENTIALS is not set")
     if not os.path.exists(credentials_path):
         logger.info(
-            "No google credentials found at %s. Creating  credentials from GOOGLE_CREDENTIALS.",
+            "No google credentials found at %s. Creating credentials from GOOGLE_CREDENTIALS.",
             credentials_path,
         )
         os.makedirs(os.path.dirname(credentials_path), exist_ok=True)
-        credentials = json.loads(os.getenv("GOOGLE_CREDENTIALS"))
-        with open(os.getenv("GOOGLE_APPLICATION_CREDENTIALS"), "w") as f:
+        credentials = json.loads(os.environ["GOOGLE_CREDENTIALS"])
+        with open(credentials_path, "w") as f:
             json.dump(credentials, f, indent=4)
 
 
 class GoogleBatchJobConfig(BaseModel):
     """Batch job configuration class."""
 
-    # By default, extra fields are just ignored. We raise an error in case of extra fields.
+    # By default, extra fields are just ignored. We raise an error in case of extra
+    # fields.
     model_config: ConfigDict = {"extra": "forbid"}
 
     job_name: str = Field(
@@ -118,14 +119,13 @@ def init(
         config_path: Path,
         env_names: Optional[Iterable[str]] = None,
     ) -> "GoogleBatchJobConfig":
-        """Initialize the class with the configuration file corresponding to the job type.
+        """Initialize the class with the configuration file corresponding to the job
+        type.
 
         :param job_name: Name of the job.
-        :type job_name: str
         :param config_path: Path to the configuration file.
-        :type config_path: Path
-        :param env_variables: List of environment variables to add to the job, defaults to None.
-        :type env_variables: Optional[Iterable[str]], optional
+        :param env_variables: List of environment variables to add to the job, defaults
+            to None.
         """
         # Batch job name should respect a specific pattern, or returns an error
         pattern = "^[a-z]([a-z0-9-]{0,61}[a-z0-9])?$"
@@ -143,7 +143,7 @@ def init(
         if not env_names:
             env_variables = {}
         else:
-            env_variables = {var_name: os.getenv(var_name) for var_name in env_names}
+            env_variables = {var_name: os.environ[var_name] for var_name in env_names}
 
         # Load config file from job_type
         with open(config_path, "r") as f:
@@ -156,12 +156,10 @@ def launch_job(batch_job_config: GoogleBatchJobConfig) -> batch_v1.Job:
 
     Sources:
     * https://github.com/GoogleCloudPlatform/python-docs-samples/tree/main/batch/create
-    * https://cloud.google.com/python/docs/reference/batch/latest/google.cloud.batch_v1.types
+    * https://cloud.google.com/python/docs/reference/batch/latest/google.cloud.batch_v1.types  # noqa
 
     :param google_batch_launch_config: Config to run a job on Google Batch.
-    :type google_batch_launch_config: GoogleBatchLaunchConfig
     :param batch_job_config: Config to run a specific job on Google Batch.
-    :type batch_job_config: BatchJobConfig
     :return: Batch job information.
 
     Returns:
@@ -200,7 +198,8 @@ def launch_job(batch_job_config: GoogleBatchJobConfig) -> batch_v1.Job:
     group.task_count = batch_job_config.task_count
     group.task_spec = task
 
-    # Policies are used to define on what kind of virtual machines the tasks will run on.
+    # Policies are used to define on what kind of virtual machines the tasks will run
+    # on.
     policy = batch_v1.AllocationPolicy.InstancePolicy()
     policy.machine_type = batch_job_config.machine_type
     instances = batch_v1.AllocationPolicy.InstancePolicyOrTemplate()

diff --git a/robotoff/cli/main.py b/robotoff/cli/main.py
@@ -1015,8 +1015,7 @@ def launch_batch_job(
         )
 
     get_logger()
-    job_type = BatchJobType[job_type]
-    _launch_batch_job(job_type)
+    _launch_batch_job(BatchJobType[job_type])
 
 
 def main() -> None:

diff --git a/robotoff/insights/importer.py b/robotoff/insights/importer.py
@@ -1509,13 +1509,16 @@ def is_conflicting_insight(
     def _keep_prediction(
         cls, prediction: Prediction, product: Optional[Product]
     ) -> bool:
-        conditions = [
+        return (
             # Spellcheck didn't correct
-            prediction.data["original"] != prediction.data["correction"],
-            # Modification of the original ingredients between two dataset dumps (24-hour period)
-            product is None or prediction.data["original"] != product.ingredients_text,
-        ]
-        return all(conditions)
+            prediction.data["original"] != prediction.data["correction"]
+            # Modification of the original ingredients between two dataset dumps
+            # (24-hour period)
+            and (
+                product is None
+                or prediction.data["original"] != product.ingredients_text
+            )
+        )
 
 
 class PackagingElementTaxonomyException(Exception):

diff --git a/tests/unit/test_batch.py b/tests/unit/test_batch.py
@@ -24,7 +24,8 @@
     ],
 )
 def test_batch_job_config_file(inputs):
-    "Test indirectly the batch job config file by validating with the Pydantic class model."
+    """Test indirectly the batch job config file by validating with the Pydantic class
+    model."""
     job_name, config_path, env_names = inputs
     GoogleBatchJobConfig.init(
         job_name=job_name,