signature layer name is configurable; record top and matching scores …

…in testing results
inaturalist · Jan 3, 2025 · dc3324f · dc3324f
1 parent 1fd6442
commit dc3324f
Show file tree

Hide file tree

Showing 4 changed files with 49 additions and 16 deletions.
diff --git a/lib/inat_inferrer.py b/lib/inat_inferrer.py
@@ -138,7 +138,11 @@ def setup_synonym_taxonomy(self):
         self.taxonomy = synonym_taxonomy
 
     def setup_vision_model(self):
-        self.vision_inferrer = VisionInferrer(self.config["vision_model_path"])
+        self.vision_inferrer = VisionInferrer(
+            self.config["vision_model_path"],
+            self.config["vision_model_signature_layer"] if "vision_model_signature_layer"
+                                                           in self.config else None
+        )
 
     def setup_elevation_dataframe(self):
         self.geo_elevation_cells = None
@@ -653,7 +657,10 @@ async def embedding_for_photo(self, url, session):
 
     def signature_for_image(self, image_path):
         image = InatInferrer.prepare_image_for_inference(image_path)
-        return self.vision_inferrer.signature_for_image(image).tolist()
+        signature = self.vision_inferrer.signature_for_image(image)
+        if self.vision_inferrer.signature_for_image(image) is None:
+            return
+        return signature.tolist()
 
     async def download_photo_async(self, url, session):
         checksum = hashlib.md5(url.encode()).hexdigest()

diff --git a/lib/vision_inferrer.py b/lib/vision_inferrer.py
@@ -3,8 +3,9 @@
 
 class VisionInferrer:
 
-    def __init__(self, model_path):
+    def __init__(self, model_path, signature_layer_name=None):
         self.model_path = model_path
+        self.signature_layer_name = signature_layer_name
         self.prepare_tf_model()
 
     # initialize the TF model given the configured path
@@ -16,16 +17,19 @@ def prepare_tf_model(self):
             assert device.device_type != "GPU"
 
         self.vision_model = tf.keras.models.load_model(self.model_path, compile=False)
-        self.signature_model = tf.keras.Model(
-            inputs=self.vision_model.inputs,
-            outputs=self.vision_model.get_layer("global_average_pooling2d_5").output
-        )
-        self.signature_model.compile()
+        if self.signature_layer_name is not None:
+            self.signature_model = tf.keras.Model(
+                inputs=self.vision_model.inputs,
+                outputs=self.vision_model.get_layer(self.signature_layer_name).output
+            )
+            self.signature_model.compile()
 
     # given an image object (usually coming from prepare_image_for_inference),
     # calculate vision results for the image
     def process_image(self, image):
         return self.vision_model(tf.convert_to_tensor(image), training=False)[0]
 
     def signature_for_image(self, image):
+        if not hasattr(self, "signature_model"):
+            return
         return self.signature_model(tf.convert_to_tensor(image), training=False)[0].numpy()
diff --git a/lib/vision_testing.py b/lib/vision_testing.py
@@ -377,17 +377,24 @@ def summarize_result_subset(
         self, inferrer, observation, inferrer_results, summary_index, cutoff=False
     ):
         working_results = inferrer_results["scores"]
+        score_column = "vision_score" if summary_index == "vision" else "combined_score"
+        values = working_results.head(1)[score_column].values
+        if len(values) == 0:
+            top_score = 0
+        else:
+            top_score = values[0]
         if cutoff:
-            score_column = "vision_score" if summary_index == "vision" else "combined_score"
-            values = working_results.head(1)[score_column].values
-            if len(values) == 0:
-                top_score = 0
-            else:
-                top_score = values[0]
             working_results = working_results.query(
                 f"{score_column} > {top_score * 0.001}"
             ).head(10)
 
+        normalized_score_column = f"normalized_{score_column}"
+        normalized_values = working_results.head(1)[normalized_score_column].values
+        if len(values) == 0:
+            top_normalized_score = 0
+        else:
+            top_normalized_score = normalized_values[0]
+
         summary = {}
         common_ancestor = inferrer_results["common_ancestor"]
         if common_ancestor["taxon"] is None:
@@ -426,6 +433,9 @@ def summarize_result_subset(
             2 * summary["precision"]
         ) / sum_of_precision_and_recall
 
+        summary["top_score"] = top_normalized_score
+        summary["matching_score"] = self.matching_score(observation, working_results, normalized_score_column)
+
         return summary
 
     def matching_index(self, observation, results):
@@ -434,6 +444,16 @@ def matching_index(self, observation, results):
         ].tolist()
         return matching_indices[0] if len(matching_indices) > 0 else None
 
+    def matching_score(self, observation, results, score_column):
+        matches = results.query(
+            f"taxon_id == {observation.taxon_id}"
+        )
+        values = matches.head(1)[score_column].values
+        if len(values) == 0:
+            return 0
+        else:
+            return values[0]
+
     async def download_photo_async(self, photo_url):
         checksum = hashlib.md5(photo_url.encode()).hexdigest()
         cache_path = os.path.join(self.upload_folder, "obs-" + checksum) + ".jpg"

diff --git a/tests/test_vision_inferrer.py b/tests/test_vision_inferrer.py
@@ -8,7 +8,8 @@ def test_initialization(self, mocker):
         mocker.patch("tensorflow.keras.models.load_model", return_value=MagicMock())
         mocker.patch("tensorflow.keras.Model", return_value=MagicMock())
         model_path = "model_path"
-        inferrer = VisionInferrer(model_path)
+        signature_layer = "signature_layer"
+        inferrer = VisionInferrer(model_path, signature_layer)
         assert inferrer.model_path == model_path
         tf.keras.models.load_model.assert_called_once_with(
             model_path,
@@ -19,7 +20,8 @@ def test_process_image(self, mocker):
         mocker.patch("tensorflow.keras.models.load_model", return_value=MagicMock())
         mocker.patch("tensorflow.keras.Model", return_value=MagicMock())
         model_path = "model_path"
-        inferrer = VisionInferrer(model_path)
+        signature_layer = "signature_layer"
+        inferrer = VisionInferrer(model_path, signature_layer)
         theimage = "theimage"
         inferrer.process_image(theimage)
         inferrer.vision_model.assert_called_once_with(