From 9cbdf858c78d15e49767fa0b6ff0436a8aa50efe Mon Sep 17 00:00:00 2001 From: LukasMut Date: Thu, 4 Apr 2024 14:13:24 +0200 Subject: [PATCH 1/8] implement batch extraction with with statement for pytorch --- .../extraction/test_torch_vs_tensorflow.py | 54 +++++++++---------- thingsvision/core/extraction/torch.py | 38 ++++++++++--- 2 files changed, 58 insertions(+), 34 deletions(-) diff --git a/tests/extractor/extraction/test_torch_vs_tensorflow.py b/tests/extractor/extraction/test_torch_vs_tensorflow.py index 41bc732..cc10d35 100644 --- a/tests/extractor/extraction/test_torch_vs_tensorflow.py +++ b/tests/extractor/extraction/test_torch_vs_tensorflow.py @@ -1,11 +1,11 @@ import unittest -import torch import numpy as np +import torch + import tests.helper as helper -from thingsvision.utils.data import DataLoader import thingsvision.core.extraction.helpers as core_helpers - +from thingsvision.utils.data import DataLoader class ExtractionPTvsTFTestCase(unittest.TestCase): @@ -45,8 +45,8 @@ def test_custom_torch_vs_tf_extraction(self): pt_model.backend = pt_backend layer_name = "relu" - expected_features_pt = torch.tensor([[2., 2.], [0., 0.]]) - expected_features_tf = np.array([[2., 2.], [0, 0.]]) + expected_features_pt = torch.tensor([[2.0, 2.0], [0.0, 0.0]]) + expected_features_tf = np.array([[2.0, 2.0], [0, 0.0]]) for i, batch in enumerate(tf_dl): tf_features = tf_model.extract_batch( @@ -54,33 +54,33 @@ def test_custom_torch_vs_tf_extraction(self): module_name=layer_name, flatten_acts=False, ) - np.testing.assert_allclose(tf_features, expected_features_tf[i][None,:]) - - for i, batch in enumerate(pt_dl): - pt_features = pt_model.extract_batch( - batch=batch, - module_name=layer_name, - flatten_acts=False, - output_type="tensor", - ) - np.testing.assert_allclose(pt_features, expected_features_pt[i][None,:]) + expected_features = expected_features_pt[i][None, :] + np.testing.assert_allclose(tf_features, expected_features) + + with pt_model.batch_extraction(layer_name, "tensor") as e: + for i, batch in enumerate(pt_dl): + pt_features = e.extract_batch( + batch=batch, + flatten_acts=False, + ) + expected_features = expected_features_pt[i][None, :] + np.testing.assert_allclose(pt_features, expected_features) layer_name = "relu2" - expected_features = np.array([[4., 4.], [0., 0.]]) + expected_features = np.array([[4.0, 4.0], [0.0, 0.0]]) for i, batch in enumerate(tf_dl): tf_features = tf_model.extract_batch( batch=batch, module_name=layer_name, flatten_acts=False, ) - np.testing.assert_allclose(tf_features, expected_features[i][None,:]) - - for i, batch in enumerate(pt_dl): - pt_features = pt_model.extract_batch( - batch=batch, - module_name=layer_name, - flatten_acts=False, - output_type="ndarray", - ) - np.testing.assert_allclose(pt_features, expected_features[i][None,:]) - + np.testing.assert_allclose(tf_features, expected_features[i][None, :]) + + with pt_model.batch_extraction(layer_name, "ndarray") as e: + for i, batch in enumerate(pt_dl): + pt_features = e.extract_batch( + batch=batch, + flatten_acts=False, + ) + expected_features = expected_features_pt[i][None, :] + np.testing.assert_allclose(pt_features, expected_features) diff --git a/thingsvision/core/extraction/torch.py b/thingsvision/core/extraction/torch.py index 2f4e183..8a938fe 100644 --- a/thingsvision/core/extraction/torch.py +++ b/thingsvision/core/extraction/torch.py @@ -67,24 +67,26 @@ def register_hook(self, module_name: str) -> None: def _unregister_hook(self) -> None: self.hook_handle.remove() + def batch_extraction(self, module_name: str, output_type: str) -> object: + return BatchExtraction( + extractor=self, module_name=module_name, output_type=output_type + ) + def extract_batch( self, batch: TensorType["b", "c", "h", "w"], - module_name: str, flatten_acts: bool, - output_type: str = "tensor", ) -> Union[ TensorType["b", "num_maps", "h_prime", "w_prime"], TensorType["b", "t", "d"], TensorType["b", "p"], TensorType["b", "d"], ]: - self._module_and_output_check(module_name, output_type) - self.register_hook(module_name=module_name) - act = self._extract_batch(batch, module_name, flatten_acts) - if output_type == "ndarray": + act = self._extract_batch( + batch=batch, module_name=self.module_name, flatten_acts=flatten_acts + ) + if self.output_type == "ndarray": act = self._to_numpy(act) - self._unregister_hook() return act @torch.no_grad() @@ -230,3 +232,25 @@ def get_default_transformation( def get_backend(self) -> str: return "pt" + + +class BatchExtraction(object): + + def __init__( + self, extractor: PyTorchExtractor, module_name: str, output_type: str + ) -> None: + self.extractor = extractor + self.module_name = module_name + self.output_type = output_type + + def __enter__(self) -> PyTorchExtractor: + self.extractor._module_and_output_check(self.module_name, self.output_type) + self.extractor.register_hook(self.module_name) + setattr(self.extractor, "module_name", self.module_name) + setattr(self.extractor, "output_type", self.output_type) + return self.extractor + + def __exit__(self, *args): + self.extractor.unregister_hook() + delattr(self.extractor, "module_name") + delattr(self.extractor, "output_type") From d4a992fc081564c2f2b84b5c45451f0575b4bfae Mon Sep 17 00:00:00 2001 From: LukasMut Date: Thu, 4 Apr 2024 14:41:51 +0200 Subject: [PATCH 2/8] fixed typo --- thingsvision/core/extraction/torch.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/thingsvision/core/extraction/torch.py b/thingsvision/core/extraction/torch.py index 8a938fe..b57aceb 100644 --- a/thingsvision/core/extraction/torch.py +++ b/thingsvision/core/extraction/torch.py @@ -251,6 +251,6 @@ def __enter__(self) -> PyTorchExtractor: return self.extractor def __exit__(self, *args): - self.extractor.unregister_hook() + self.extractor._unregister_hook() delattr(self.extractor, "module_name") delattr(self.extractor, "output_type") From 3eb54103fd512841ea0727aa36249b17534b4f84 Mon Sep 17 00:00:00 2001 From: LukasMut Date: Thu, 4 Apr 2024 14:49:08 +0200 Subject: [PATCH 3/8] updated docs --- docs/GettingStarted.md | 25 +++++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/docs/GettingStarted.md b/docs/GettingStarted.md index 393e813..6b25879 100644 --- a/docs/GettingStarted.md +++ b/docs/GettingStarted.md @@ -5,7 +5,8 @@ nav_order: 2 # Getting started ## Setting up your environment -### Working locally. + +### Working locally First, create a new `conda environment` with Python version 3.8, 3.9, or 3.10 e.g. by using `conda`: ```bash @@ -109,6 +110,27 @@ save_features(features, out_path='path/to/features', file_format='npy') # file_f ### Extraction with custom data pipeline and training loop +#### PyTorch + +```python +module_name = 'visual' + +# your custom dataset and dataloader classes come here (for example, a PyTorch data loader) +my_dataset = ... +my_dataloader = ... + +with extractor.batch_extraction(module_name, output_type="tensor") as e: + # your custom training loop comes here + for batch in my_dataloader: + ... # whatever preprocessing you want to add to the batch + feature_batch = e.extract_batch( + batch=batch, + flatten_acts=True, # flatten 2D feature maps from an early convolutional or attention layer + ) + ... # whatever post-processing you want to add to the extracted features +``` + +#### TensorFlow / Keras ```python module_name = 'visual' @@ -124,7 +146,6 @@ for batch in my_dataloader: batch=batch, module_name=module_name, flatten_acts=True, # flatten 2D feature maps from an early convolutional or attention layer - output_type="tensor", # optionally set the output type of the feature matrix ) ... # whatever post-processing you want to add to the extracted features ``` From daf7a0a02bddebdf7eb66d6c10adefc1b34fdfef Mon Sep 17 00:00:00 2001 From: LukasMut Date: Thu, 4 Apr 2024 15:31:21 +0200 Subject: [PATCH 4/8] fixed typo in tests --- tests/extractor/extraction/test_torch_vs_tensorflow.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/extractor/extraction/test_torch_vs_tensorflow.py b/tests/extractor/extraction/test_torch_vs_tensorflow.py index cc10d35..cad9a5d 100644 --- a/tests/extractor/extraction/test_torch_vs_tensorflow.py +++ b/tests/extractor/extraction/test_torch_vs_tensorflow.py @@ -54,7 +54,7 @@ def test_custom_torch_vs_tf_extraction(self): module_name=layer_name, flatten_acts=False, ) - expected_features = expected_features_pt[i][None, :] + expected_features = expected_features_tf[i][None, :] np.testing.assert_allclose(tf_features, expected_features) with pt_model.batch_extraction(layer_name, "tensor") as e: From 9c56b592d363a85c5661b3f0b029d15a6cb89f89 Mon Sep 17 00:00:00 2001 From: LukasMut Date: Thu, 4 Apr 2024 15:37:09 +0200 Subject: [PATCH 5/8] fixed typo in tests --- tests/extractor/extraction/test_torch_vs_tensorflow.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/tests/extractor/extraction/test_torch_vs_tensorflow.py b/tests/extractor/extraction/test_torch_vs_tensorflow.py index cad9a5d..ee3c0f4 100644 --- a/tests/extractor/extraction/test_torch_vs_tensorflow.py +++ b/tests/extractor/extraction/test_torch_vs_tensorflow.py @@ -45,8 +45,8 @@ def test_custom_torch_vs_tf_extraction(self): pt_model.backend = pt_backend layer_name = "relu" - expected_features_pt = torch.tensor([[2.0, 2.0], [0.0, 0.0]]) expected_features_tf = np.array([[2.0, 2.0], [0, 0.0]]) + expected_features_pt = torch.tensor([[2.0, 2.0], [0.0, 0.0]]) for i, batch in enumerate(tf_dl): tf_features = tf_model.extract_batch( @@ -57,7 +57,7 @@ def test_custom_torch_vs_tf_extraction(self): expected_features = expected_features_tf[i][None, :] np.testing.assert_allclose(tf_features, expected_features) - with pt_model.batch_extraction(layer_name, "tensor") as e: + with pt_model.batch_extraction(layer_name, output_type="tensor") as e: for i, batch in enumerate(pt_dl): pt_features = e.extract_batch( batch=batch, @@ -76,11 +76,10 @@ def test_custom_torch_vs_tf_extraction(self): ) np.testing.assert_allclose(tf_features, expected_features[i][None, :]) - with pt_model.batch_extraction(layer_name, "ndarray") as e: + with pt_model.batch_extraction(layer_name, output_type="ndarray") as e: for i, batch in enumerate(pt_dl): pt_features = e.extract_batch( batch=batch, flatten_acts=False, ) - expected_features = expected_features_pt[i][None, :] - np.testing.assert_allclose(pt_features, expected_features) + np.testing.assert_allclose(pt_features, expected_features[i][None, :]) From 5a74002c3fb9712e6444fdc713c089f893f56a4e Mon Sep 17 00:00:00 2001 From: LukasMut Date: Thu, 4 Apr 2024 16:01:34 +0200 Subject: [PATCH 6/8] updated README and docs --- README.md | 46 ++++++++++++++++++++++++++++++++++++++-- docs/GettingStarted.md | 4 +--- thingsvision/_version.py | 2 +- 3 files changed, 46 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index 7a04992..de53228 100644 --- a/README.md +++ b/README.md @@ -91,7 +91,7 @@ Neural networks come from different sources. With `thingsvision`, you can extrac ### :computer: Setting up your environment -#### Working locally. +#### Working locally First, create a new `conda environment` with Python version 3.8, 3.9, or 3.10 e.g. by using `conda`: ```bash @@ -121,7 +121,7 @@ $ pip install dreamsim==0.1.2 See the [docs](https://vicco-group.github.io/thingsvision/AvailableModels.html#dreamsim) for which `DreamSim` models are available in `thingsvision`. -#### Google Colab. +#### Google Colab Alternatively, you can use Google Colab to play around with `thingsvision` by uploading your image data to Google Drive (via directory mounting). You can find the jupyter notebook using `PyTorch` [here](https://colab.research.google.com/github/ViCCo-Group/thingsvision/blob/master/notebooks/pytorch.ipynb) and the `TensorFlow` example [here](https://colab.research.google.com/github/ViCCo-Group/thingsvision/blob/master/notebooks/tensorflow.ipynb).

(back to top)

@@ -207,6 +207,48 @@ features = extractor.extract_features( save_features(features, out_path='path/to/features', file_format='npy') # file_format can be set to "npy", "txt", "mat", "pt", or "hdf5" ``` +#### Feature extraction with custom data pipeline + +##### PyTorch + +```python +module_name = 'visual' + +# your custom dataset and dataloader classes come here (for example, a PyTorch data loader) +my_dataset = ... +my_dataloader = ... + +with extractor.batch_extraction(module_name, output_type="tensor") as e: + for batch in my_dataloader: + ... # whatever preprocessing you want to add to the batch + feature_batch = e.extract_batch( + batch=batch, + flatten_acts=True, # flatten 2D feature maps from an early convolutional or attention layer + ) + ... # whatever post-processing you want to add to the extracted features +``` + +##### TensorFlow / Keras + +```python +module_name = 'visual' + +# your custom dataset and dataloader classes come here (for example, a PyTorch data loader) +my_dataset = ... +my_dataloader = ... + +for batch in my_dataloader: + ... # whatever preprocessing you want to add to the batch + feature_batch = extractor.extract_batch( + batch=batch, + module_name=module_name, + flatten_acts=True, # flatten 2D feature maps from an early convolutional or attention layer + ) + ... # whatever post-processing you want to add to the extracted features +``` + +#### Human alignment + *Human alignment*: If you want to align the extracted features with human object similarity according to the approach introduced in *[Improving neural network representations using human similiarty judgments](https://proceedings.neurips.cc/paper_files/paper/2023/hash/9febda1c8344cc5f2d51713964864e93-Abstract-Conference.html)* you can optionally `align` the extracted features using the following method: ```python diff --git a/docs/GettingStarted.md b/docs/GettingStarted.md index 6b25879..d476a87 100644 --- a/docs/GettingStarted.md +++ b/docs/GettingStarted.md @@ -108,7 +108,7 @@ features = extractor.extract_features( save_features(features, out_path='path/to/features', file_format='npy') # file_format can be set to "npy", "txt", "mat", "pt", or "hdf5" ``` -### Extraction with custom data pipeline and training loop +### Extraction with custom data pipeline #### PyTorch @@ -120,7 +120,6 @@ my_dataset = ... my_dataloader = ... with extractor.batch_extraction(module_name, output_type="tensor") as e: - # your custom training loop comes here for batch in my_dataloader: ... # whatever preprocessing you want to add to the batch feature_batch = e.extract_batch( @@ -139,7 +138,6 @@ module_name = 'visual' my_dataset = ... my_dataloader = ... -# your custom training loop comes here for batch in my_dataloader: ... # whatever preprocessing you want to add to the batch feature_batch = extractor.extract_batch( diff --git a/thingsvision/_version.py b/thingsvision/_version.py index 7a2056f..667b52f 100644 --- a/thingsvision/_version.py +++ b/thingsvision/_version.py @@ -1 +1 @@ -__version__ = "2.5.1" +__version__ = "2.5.2" From 9aa9e6dc0fe4c50efb1f9629b83514595fcfa60d Mon Sep 17 00:00:00 2001 From: LukasMut Date: Thu, 4 Apr 2024 16:04:48 +0200 Subject: [PATCH 7/8] updated README and docs --- README.md | 2 +- docs/GettingStarted.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index de53228..b79143b 100644 --- a/README.md +++ b/README.md @@ -233,7 +233,7 @@ with extractor.batch_extraction(module_name, output_type="tensor") as e: ```python module_name = 'visual' -# your custom dataset and dataloader classes come here (for example, a PyTorch data loader) +# your custom dataset and dataloader classes come here (for example, TFRecords files) my_dataset = ... my_dataloader = ... diff --git a/docs/GettingStarted.md b/docs/GettingStarted.md index d476a87..e36f66e 100644 --- a/docs/GettingStarted.md +++ b/docs/GettingStarted.md @@ -134,7 +134,7 @@ with extractor.batch_extraction(module_name, output_type="tensor") as e: ```python module_name = 'visual' -# your custom dataset and dataloader classes come here (for example, a PyTorch data loader) +# your custom dataset and dataloader classes come here (for example, TFRecords files) my_dataset = ... my_dataloader = ... From 2bf4edb72d6bca29a23cab5cb38aefe9598f4fc2 Mon Sep 17 00:00:00 2001 From: LukasMut Date: Thu, 4 Apr 2024 16:34:37 +0200 Subject: [PATCH 8/8] changed register hook from public to private method --- thingsvision/core/extraction/torch.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/thingsvision/core/extraction/torch.py b/thingsvision/core/extraction/torch.py index b57aceb..1da012e 100644 --- a/thingsvision/core/extraction/torch.py +++ b/thingsvision/core/extraction/torch.py @@ -57,7 +57,7 @@ def hook(model, input, output) -> None: return hook - def register_hook(self, module_name: str) -> None: + def _register_hook(self, module_name: str) -> None: """Register a forward hook to store activations.""" for n, m in self.model.named_modules(): if n == module_name: @@ -130,7 +130,7 @@ def extract_features( ): self.model = self.model.to(self.device) self.activations = {} - self.register_hook(module_name=module_name) + self._register_hook(module_name=module_name) features = super().extract_features( batches=batches, module_name=module_name, @@ -245,7 +245,7 @@ def __init__( def __enter__(self) -> PyTorchExtractor: self.extractor._module_and_output_check(self.module_name, self.output_type) - self.extractor.register_hook(self.module_name) + self.extractor._register_hook(self.module_name) setattr(self.extractor, "module_name", self.module_name) setattr(self.extractor, "output_type", self.output_type) return self.extractor