update: OpenAIJudge

wandb · Sep 5, 2024 · 006dfa3 · 006dfa3
1 parent d6a4e27
commit 006dfa3
Show file tree

Hide file tree

Showing 4 changed files with 76 additions and 8 deletions.
diff --git a/examples/multimodal_llm_eval/evaluate_mllm_metric_complex.py b/examples/multimodal_llm_eval/evaluate_mllm_metric_complex.py
@@ -1,4 +1,4 @@
-from typing import Optional, Tuple
+from typing import Optional
 
 import fire
 import wandb
@@ -16,23 +16,25 @@ def main(
     dataset_limit: Optional[int] = None,
     diffusion_model_address: str = "stabilityai/stable-diffusion-2-1",
     diffusion_model_enable_cpu_offfload: bool = False,
-    image_size: Tuple[int, int] = (512, 512),
+    image_height: int = 1024,
+    image_width: int = 1024,
 ):
     wandb.init(project=project, entity=entity, job_type="evaluation")
-    weave.init(project_name=project)
+    weave.init(project_name=f"{entity}/{project}")
 
     dataset = weave.ref(dataset_ref).get()
     dataset = dataset.rows[:dataset_limit] if dataset_limit else dataset
 
     diffusion_model = BaseDiffusionModel(
         diffusion_model_name_or_path=diffusion_model_address,
         enable_cpu_offfload=diffusion_model_enable_cpu_offfload,
-        image_height=image_size[0],
-        image_width=image_size[1],
+        image_height=image_height,
+        image_width=image_width,
     )
+    diffusion_model._pipeline.set_progress_bar_config(disable=True)
     evaluation_pipeline = EvaluationPipeline(model=diffusion_model)
 
-    judge = OpenAIJudge(prompt_property=PromptCategory.complex)
+    judge = OpenAIJudge(prompt_property=PromptCategory.action)
     metric = MultiModalLLMEvaluationMetric(judge=judge)
     evaluation_pipeline.add_metric(metric)
 

diff --git a/hemm/metrics/vqa/judges/mmllm_judges/openai_judge.py b/hemm/metrics/vqa/judges/mmllm_judges/openai_judge.py
@@ -1,4 +1,5 @@
 import os
+import subprocess
 from typing import List
 
 import instructor
@@ -65,6 +66,7 @@ def __init__(
             max_retries=max_retries,
             seed=seed,
         )
+        subprocess.run(["spacy", "download", "en_core_web_sm"])
         self._nlp_pipeline = spacy.load(self.prompt_pipeline)
         self._openai_client = OpenAI(api_key=os.environ["OPENAI_API_KEY"])
         self._instructor_openai_client = instructor.from_openai(

diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -32,6 +32,7 @@ spacy = "^3.7.5"
 instructor = "^1.3.4"
 torchmetrics = { extras = ["multimodal"], version = "^1.4.1" }
 mkdocstrings = {version = "^0.25.2", extras = ["python"]}
+sentencepiece = "^0.2.0"
 
 [tool.poetry.extras]
 core = [
@@ -48,6 +49,7 @@ core = [
     "spacy",
     "instructor",
     "torchmetrics",
+    "sentencepiece",
 ]
 docs = [
     "mkdocs",