openvinotoolkit · samet-akcay · Jan 7, 2025 · Oct 29, 2024 · Oct 29, 2024 · Nov 1, 2024
@@ -109,6 +109,13 @@ Anomaly Detection via Reverse Distillation from One-Class Embedding.
 Student-Teacher Feature Pyramid Matching for Unsupervised Anomaly Detection
 :::
 
+:::{grid-item-card} {material-regular}`model_training;1.5em` SuperSimpleNet
+:link: ./supersimplenet
+:link-type: doc
+
+SuperSimpleNet: Unifying Unsupervised and Supervised Learning for Fast and Reliable Surface Defect Detection
+:::
+
 :::{grid-item-card} {material-regular}`model_training;1.5em` U-Flow
 :link: ./uflow
 :link-type: doc

@@ -0,0 +1,26 @@
+# SuperSimpleNet
+
+## Architecture
+
+```{eval-rst}
+.. image:: ../../../../../images/supersimplenet/architecture.png
+    :alt: SuperSimpleNet Architecture
+```
+
+```{eval-rst}
+.. automodule:: anomalib.models.image.supersimplenet.lightning_model
+   :members:
+   :show-inheritance:
+```
+
+```{eval-rst}
+.. automodule:: anomalib.models.image.supersimplenet.torch_model
+   :members:
+   :show-inheritance:
+```
+
+```{eval-rst}
+.. automodule:: anomalib.models.image.supersimplenet.anomaly_generator
+   :members:
+   :show-inheritance:
+```
@@ -33,6 +33,7 @@
     - PatchCore (:class:`anomalib.models.image.Patchcore`)
     - Reverse Distillation (:class:`anomalib.models.image.ReverseDistillation`)
     - STFPM (:class:`anomalib.models.image.Stfpm`)
+    - SuperSimpleNet (:class:`anomalib.models.image.Supersimplenet`)
     - UFlow (:class:`anomalib.models.image.Uflow`)
     - VLM-AD (:class:`anomalib.models.image.VlmAd`)
     - WinCLIP (:class:`anomalib.models.image.WinClip`)
@@ -69,6 +70,7 @@
     Patchcore,
     ReverseDistillation,
     Stfpm,
+    Supersimplenet,
     Uflow,
     VlmAd,
     WinClip,
@@ -96,6 +98,7 @@ class UnknownModelError(ModuleNotFoundError):
     "Patchcore",
     "ReverseDistillation",
     "Stfpm",
+    "Supersimplenet",
     "Uflow",
     "VlmAd",
     "WinClip",

@@ -186,7 +186,11 @@ def initialize_feature_extractor(
             backbone_class = backbone.class_path
             backbone_model = backbone_class(**backbone.init_args)
 
-        if isinstance(weights, WeightsEnum):  # torchvision models
+        if isinstance(weights, WeightsEnum) or weights in {
+            "IMAGENET1K_V1",
+            "IMAGENET1K_V2",
+            "DEFAULT",
+        }:  # torchvision models
             feature_extractor = create_feature_extractor(
                 model=backbone_model,
                 return_nodes=return_nodes,

@@ -28,6 +28,7 @@
     - :class:`Patchcore`: Patch Core
     - :class:`ReverseDistillation`: Reverse Knowledge Distillation
     - :class:`Stfpm`: Student-Teacher Feature Pyramid Matching
+    - :class:`SuperSimpleNet`: SuperSimpleNet
     - :class:`Uflow`: Unsupervised Flow
     - :class:`VlmAd`: Vision Language Model Anomaly Detection
     - :class:`WinClip`: Zero-/Few-Shot CLIP-based Detection
@@ -51,6 +52,7 @@
 from .patchcore import Patchcore
 from .reverse_distillation import ReverseDistillation
 from .stfpm import Stfpm
+from .supersimplenet import Supersimplenet
 from .uflow import Uflow
 from .vlm_ad import VlmAd
 from .winclip import WinClip
@@ -71,6 +73,7 @@
     "Patchcore",
     "ReverseDistillation",
     "Stfpm",
+    "Supersimplenet",
     "Uflow",
     "VlmAd",
     "WinClip",

@@ -0,0 +1,29 @@
+Copyright (c) 2024 Intel Corporation
+SPDX-License-Identifier: Apache-2.0
+
+Some files in this folder are based on the original SuperSimpleNet implementation by Blaž Rolih
+
+Original license:
+-----------------
+
+    MIT License
+
+    Copyright (c) 2024 Blaž Rolih
+
+    Permission is hereby granted, free of charge, to any person obtaining a copy
+    of this software and associated documentation files (the "Software"), to deal
+    in the Software without restriction, including without limitation the rights
+    to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+    copies of the Software, and to permit persons to whom the Software is
+    furnished to do so, subject to the following conditions:
+
+    The above copyright notice and this permission notice shall be included in all
+    copies or substantial portions of the Software.
+
+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+    AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+    OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+    SOFTWARE.
@@ -0,0 +1,57 @@
+# SuperSimpleNet: Unifying Unsupervised and Supervised Learning for Fast and Reliable Surface Defect Detection
+
+This is an implementation of the [SuperSimpleNet](https://arxiv.org/pdf/2408.03143) paper, based on the [official code](https://github.com/blaz-r/SuperSimpleNet).
+
+Model Type: Segmentation
+
+## Description
+
+**SuperSimpleNet** is a simple yet strong discriminative defect / anomaly detection model evolved from the SimpleNet architecture. It consists of four components:
+feature extractor with upscaling, feature adaptor, feature-level synthetic anomaly generation module, and
+segmentation-detection module.
+
+A ResNet-like feature extractor first extracts features, which are then upscaled and
+average-pooled to capture neighboring context. Features are further refined for anomaly detection task in the adaptor module.
+During training, synthetic anomalies are generated at the feature level by adding Gaussian noise to regions defined by the
+binary Perlin noise mask. The perturbed features are then fed into the segmentation-detection
+module, which produces the anomaly map and the anomaly score. During inference, anomaly generation is skipped, and the model
+directly predicts the anomaly map and score. The predicted anomaly map is upscaled to match the input image size
+and refined with a Gaussian filter.
+
+This implementation supports both unsupervised and supervised setting, but Anomalib currently supports only unsupervised learning.
 samples.loc[(samples.label == DirType.NORMAL), "split"] = Split.TRAIN 
 samples.loc[(samples.label == DirType.ABNORMAL) | (samples.label == DirType.NORMAL_TEST), "split"] = Split.TEST 
         normal_dir (str | Path | Sequence): Path to the directory containing normal images. 
         root (str | Path | None): Root folder of the dataset. 
             Defaults to ``None``. 
         abnormal_dir (str | Path | Sequence | None, optional): Path to the directory containing abnormal images. 
             Defaults to ``None``. 
         normal_test_dir (str | Path | Sequence | None, optional): Path to the directory containing 
             normal images for the test dataset. 
             Defaults to ``None``. 
 samples.loc[(samples.label == DirType.NORMAL), "split"] = Split.TRAIN 
 samples.loc[(samples.label == DirType.ABNORMAL) | (samples.label == DirType.NORMAL_TEST), "split"] = Split.TEST 
         normal_dir (str | Path | Sequence): Path to the directory containing normal images. 
         root (str | Path | None): Root folder of the dataset. 
             Defaults to ``None``. 
         abnormal_dir (str | Path | Sequence | None, optional): Path to the directory containing abnormal images. 
             Defaults to ``None``. 
         normal_test_dir (str | Path | Sequence | None, optional): Path to the directory containing 
             normal images for the test dataset. 
             Defaults to ``None``. 
+
+## Architecture
+
+![SuperSimpleNet architecture](/docs/source/images/supersimplenet/architecture.png "SuperSimpleNet architecture")
+
+## Usage
+
+`anomalib train --model SuperSimpleNet --data MVTec --data.category <category>`
+
+> It is recommended to train the model for 300 epochs with batch size of 32 to achieve stable training with random anomaly generation. Training with lower parameter values will still work, but might not yield the optimal results.
+>
+> For supervised learning, refer to the [official code](https://github.com/blaz-r/SuperSimpleNet).
+
+## MVTec AD results
+
+The following results were obtained using this Anomalib implementation trained for 300 epochs with seed 0, default params, and batch size 32.
+| | **Image AUROC** | **Pixel AUPRO** |
+| ----------- | :-------------: | :-------------: |
+| Bottle | 1.000 | 0.903 |
+| Cable | 0.981 | 0.901 |
+| Capsule | 0.989 | 0.931 |
+| Carpet | 0.985 | 0.929 |
+| Grid | 0.994 | 0.930 |
+| Hazelnut | 0.994 | 0.943 |
+| Leather | 1.000 | 0.970 |
+| Metal_nut | 0.995 | 0.920 |
+| Pill | 0.962 | 0.936 |
+| Screw | 0.912 | 0.947 |
+| Tile | 0.994 | 0.854 |
+| Toothbrush | 0.908 | 0.860 |
+| Transistor | 1.000 | 0.907 |
+| Wood | 0.987 | 0.858 |
+| Zipper | 0.995 | 0.928 |
+| Average | 0.980 | 0.914 |
+
+For other results on VisA, SensumSODF, and KSDD2, refer to the [paper](https://arxiv.org/pdf/2408.03143).
@@ -0,0 +1,8 @@
+"""SuperSimpleNet model."""
+
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+from .lightning_model import Supersimplenet
+
+__all__ = ["Supersimplenet"]
@@ -0,0 +1,142 @@
+"""Anomaly generator for the SuperSimplenet model implementation."""
+
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+import torch
+import torch.nn.functional as F  # noqa: N812
+from torch import nn
+
+from anomalib.data.utils.generators import generate_perlin_noise
+
+
+class SSNAnomalyGenerator(nn.Module):
+    """Anomaly generator of the SuperSimpleNet model."""
+
+    def __init__(
+        self,
+        noise_mean: float,
+        noise_std: float,
+        threshold: float,
+    ) -> None:
+        super().__init__()
+
+        self.noise_mean = noise_mean
+        self.noise_std = noise_std
+
+        self.threshold = threshold
+
+    @staticmethod
+    def next_power_2(num: int) -> int:
+        """Get the next power of 2 for given number.
+
+        Args:
+            num (int): value of interest
+
+        Returns:
+            next power of 2 value for given number
+        """
+        return 1 << (num - 1).bit_length()
+
+    def generate_perlin(self, batches: int, height: int, width: int) -> torch.Tensor:
+        """Generate 2d perlin noise masks with dims [b, 1, h, w].
+
+        Args:
+            batches (int): number of batches (different masks)
+            height (int): height of features
+            width (int): width of features
+
+        Returns:
+            tensor with b perlin binarized masks
+        """
+        perlin = []
+        for _ in range(batches):
+            perlin_height = self.next_power_2(height)
+            perlin_width = self.next_power_2(width)
+
+            # keep power of 2 here for reproduction purpose, although this function supports power2 internally
+            perlin_noise = generate_perlin_noise(height=perlin_height, width=perlin_width)
+
+            # original is power of 2 scale, so fit to our size
+            perlin_noise = F.interpolate(
+                perlin_noise.reshape(1, 1, perlin_height, perlin_width),
+                size=(height, width),
+                mode="bilinear",
+            )
+            # binarize
+            perlin_thr = torch.where(perlin_noise > self.threshold, 1, 0)
+
+            # 50% of anomaly
+            if torch.rand(1).item() > 0.5:
+                perlin_thr = torch.zeros_like(perlin_thr)
+
+            perlin.append(perlin_thr)
+        return torch.cat(perlin)
+
+    def forward(
+        self,
+        features: torch.Tensor,
+        mask: torch.Tensor,
+        labels: torch.Tensor,
+    ) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
+        """Generate anomaly on features using thresholded perlin noise and Gaussian noise.
+
+        Also update GT masks and labels with new anomaly information.
+
+        Args:
+            features: input features.
+            mask: GT masks.
+            labels: GT labels.
+
+        Returns:
+            perturbed features, updated GT masks and labels.
+        """
+        b, _, h, w = features.shape
+
+        # duplicate
+        features = torch.cat((features, features))
+        mask = torch.cat((mask, mask))
+        labels = torch.cat((labels, labels))
+
+        noise = torch.normal(
+            mean=self.noise_mean,
+            std=self.noise_std,
+            size=features.shape,
+            device=features.device,
+            requires_grad=False,
+        )
+
+        # mask indicating which regions will have noise applied
+        # [B * 2, 1, H, W] initial all masked as anomalous
+        noise_mask = torch.ones(
+            b * 2,
+            1,
+            h,
+            w,
+            device=features.device,
+            requires_grad=False,
+        )
+
+        # no overlap: don't apply to already anomalous regions (mask=1 -> bad)
+        noise_mask = noise_mask * (1 - mask)
+
+        # shape of noise is [B * 2, 1, H, W]
+        perlin_mask = self.generate_perlin(b * 2, h, w).to(features.device)
+        # only apply where perlin mask is 1
+        noise_mask = noise_mask * perlin_mask
+
+        # update gt mask
+        mask = mask + noise_mask
+        # binarize
+        mask = torch.where(mask > 0, torch.ones_like(mask), torch.zeros_like(mask))
+
+        # make new labels. 1 if any part of mask is 1, 0 otherwise
+        new_anomalous = noise_mask.reshape(b * 2, -1).any(dim=1).type(torch.float32)
+        labels = labels + new_anomalous
+        # binarize
+        labels = torch.where(labels > 0, torch.ones_like(labels), torch.zeros_like(labels))
+
+        # apply masked noise
+        perturbed = features + noise * noise_mask
+
+        return perturbed, mask, labels