From ff7f8c1a61c0005d6f444cc660e35a3fc38da1b7 Mon Sep 17 00:00:00 2001
From: Anders Sildnes <andsild@posteo.net>
Date: Tue, 25 Feb 2025 10:43:45 -0600
Subject: [PATCH 1/5] Support `cutoff` predictions for AL

This is to speed up AL loop

Not a perfect solution, the UI will now recommend that users predict
"default" for a lot of the labels. But it is a first step to make sure
we can handle large slide with millions of annotations
---
 .../SuperpixelClassification.xml              |   7 +
 .../SuperpixelClassificationBase.py           | 123 +++++++++++++-----
 .../SuperpixelClassificationTorch.py          |   8 +-
 3 files changed, 103 insertions(+), 35 deletions(-)
diff --git a/superpixel_classification/SuperpixelClassification/SuperpixelClassification.xml b/superpixel_classification/SuperpixelClassification/SuperpixelClassification.xml
index 38c7b77..4ffc03d 100644
--- a/superpixel_classification/SuperpixelClassification/SuperpixelClassification.xml
+++ b/superpixel_classification/SuperpixelClassification/SuperpixelClassification.xml
@@ -198,5 +198,12 @@
       <default>4</default>
       <description>The number of worker threads for superpixel and feature generation</description>
     </integer>
+    <integer>
+      <name>cutoff</name>
+      <longflag>cutoff</longflag>
+      <label>Number of annotations per slide</label>
+      <default>500</default>
+      <description>Number of unannotated superpixels to use per slide for features, training and predictions</description>
+    </integer>
   </parameters>
 </executable>
diff --git a/superpixel_classification/SuperpixelClassification/SuperpixelClassificationBase.py b/superpixel_classification/SuperpixelClassification/SuperpixelClassificationBase.py
index adc1148..d7aa4d7 100644
--- a/superpixel_classification/SuperpixelClassification/SuperpixelClassificationBase.py
+++ b/superpixel_classification/SuperpixelClassification/SuperpixelClassificationBase.py
@@ -332,7 +332,7 @@ def createFeatureListFromPatchAndMaskList(self, patch_list, mask_list, maskvals_
         )
         return feature_list
 
-    def createFeaturesForItem(self, gc, item, elem, featureFolderId, fileName, patchSize, prog):
+    def createFeaturesForItem(self, gc, item, elem, featureFolderId, fileName, patchSize, prog, cutoff):
         import large_image
 
         print('Create feature', fileName)
@@ -349,17 +349,31 @@ def createFeaturesForItem(self, gc, item, elem, featureFolderId, fileName, patch
             gc.downloadFile(maskItem['largeImage']['fileId'], maskPath)
             tsMask = large_image.open(maskPath)
 
+            num_values = len(elem['values'])
+            labeled_samples = set([i for i, x in enumerate(elem['values']) if x > 0])
+            unlabeled_samples = [i for i, x in enumerate(elem['values']) if x == 0]
+            if num_values - len(labeled_samples) > cutoff:
+                # only select a subset of unlabeled samples, i.e., prune the feature list
+                random.shuffle(unlabeled_samples)
+                unlabeled_samples = unlabeled_samples[:cutoff]
+            indices = list(sorted(list(labeled_samples) + unlabeled_samples))
+
             with h5py.File(filePath, 'w') as fptr:
                 batch_size = 1024  # TODO: Is this the best value?
-                for batch_start in range(0, len(elem['values']), batch_size):
-                    batch_list = elem['values'][batch_start: batch_start + batch_size]
+                total_size = len(indices)
+                for batch_start in range(0, total_size, batch_size):
+                    #batch_list = elem['values'][batch_start: batch_start + batch_size]
+                    batch_list = indices[batch_start: batch_start + batch_size]
                     patch_list = []
                     mask_list = []
                     maskvals_list = []
-                    for idx, _ in enumerate(batch_list, start=batch_start):
-                        prog.item_progress(item, 0.9 * idx / len(elem['values']))
-                        bbox = elem['user']['bbox'][idx * 4: idx * 4 + 4]
+
+                    for idx, i in enumerate(batch_list, start=batch_start):
+                        prog.item_progress(item, 0.9 * idx / total_size)
+                        bbox = elem['user']['bbox'][i * 4: i * 4 + 4]
                         # use masked superpixel
+                        if len(bbox) < 4:
+                            pass
                         patch = ts.getRegion(
                             region=dict(
                                 left=int(bbox[0]), top=int(bbox[1]),
@@ -409,6 +423,8 @@ def createFeaturesForItem(self, gc, item, elem, featureFolderId, fileName, patch
                                              (time.time() - starttime)),
                                   item['name'])
                     del batch_list, patch_list, mask_list, maskvals_list, feature_list
+                used_indices_ds = fptr.create_dataset(
+                    'used_indices', data=np.array(indices), dtype='i')
                 print(ds.shape, len(elem['values']), '%5.3f' % (time.time() - starttime),
                       item['name'])
             prog.item_progress(item, 0.9)
@@ -418,9 +434,9 @@ def createFeaturesForItem(self, gc, item, elem, featureFolderId, fileName, patch
             prog.item_progress(item, 1)
             return file
 
-    def createFeatures(self, gc, folderId, annotationName, featureFolderId, patchSize, numWorkers,
-                       prog):
-        itemsAndAnnot = self.getItemsAndAnnotations(gc, folderId, annotationName)
+    def createFeatures(self, gc, folderId, annotationName, itemsAndAnnot, featureFolderId, patchSize, numWorkers,
+                       prog, cutoff):
+        # itemsAndAnnot = self.getItemsAndAnnotations(gc, folderId, annotationName)
         prog.message('Creating features')
         prog.progress(0)
         prog.items([item for item, _, _ in itemsAndAnnot])
@@ -449,7 +465,7 @@ def createFeatures(self, gc, folderId, annotationName, featureFolderId, patchSiz
                     else:
                         futures.append((item, executor.submit(
                             self.createFeaturesForItem, gc, item, elem, featureFolderId,
-                            '%s.feature.h5' % (item['name']), patchSize, prog)))
+                            '%s.feature.h5' % (item['name']), patchSize, prog, cutoff)))
         for item, future in futures:
             file = future.result()
             try:
@@ -471,8 +487,13 @@ def trainModelAddItem(self, gc, record, item, annotrec, elem, feature,
         gc.downloadFile(feature['_id'], featurePath)
         with h5py.File(featurePath, 'r') as ffptr:
             fds = ffptr['images']
-            for idx, labelnum in enumerate(elem['values']):
-                if labelnum and labelnum < len(elem['categories']):
+            if 'used_indices' in ffptr:
+                indices = ffptr['used_indices']
+            else:
+                indices = range(len(elem['values']))
+            for i,idx in enumerate(indices):
+                labelnum = elem['values'][idx]
+                if 0 < labelnum < len(elem['categories']):
                     labelname = elem['categories'][labelnum]['label']
                     if labelname in excludeLabelList:
                         continue
@@ -483,7 +504,7 @@ def trainModelAddItem(self, gc, record, item, annotrec, elem, feature,
                     labelname = labelList[labelnum - 1]
                 else:
                     continue
-                patch = fds[idx]
+                patch = fds[i]
                 if not record['ds']:
                     record['ds'] = record['fptr'].create_dataset(
                         'images', (1,) + patch.shape, maxshape=(None,) + patch.shape,
@@ -503,10 +524,9 @@ def trainModelAddItem(self, gc, record, item, annotrec, elem, feature,
                     print(record['ds'].shape, record['counts'],
                           '%5.3f' % (time.time() - record['starttime']))
 
-    def trainModel(self, gc, folderId, annotationName, features, modelFolderId,
+    def trainModel(self, gc, annotationName, itemsAndAnnot, features, modelFolderId,
                    batchSize, epochs, trainingSplit, randomInput, labelList,
                    excludeLabelList, prog):
-        itemsAndAnnot = self.getItemsAndAnnotations(gc, folderId, annotationName)
         with tempfile.TemporaryDirectory(dir=os.getcwd()) as tempdir:
             trainingPath = os.path.join(tempdir, 'training.h5')
             with h5py.File(trainingPath, 'w') as fptr:
@@ -534,7 +554,7 @@ def trainModel(self, gc, folderId, annotationName, features, modelFolderId,
                 prog.progress(1)
                 if not record['ds']:
                     print('No labeled data')
-                    return
+                    return None, None
                 record['labelds'] = fptr.create_dataset(
                     'labels', (len(record['labelvals']),), dtype=int)
                 record['labelds'] = np.array(record['labelvals'], dtype=int)
@@ -566,7 +586,7 @@ def trainModel(self, gc, folderId, annotationName, features, modelFolderId,
             print('Saved modTraining')
             return modelFile, modTrainingFile
 
-    def predictLabelsForItem(self, gc, annotationName, annotationFolderId, tempdir, model, item,
+    def predictLabelsForItem(self, gc, annotationName, tempdir, model, item,
                              annotrec, elem, feature, curEpoch, userId, labels, groups,
                              makeHeatmaps, radius, magnification, certainty, batchSize, prog):
         import al_bench.factory
@@ -579,6 +599,8 @@ def predictLabelsForItem(self, gc, annotationName, annotationFolderId, tempdir,
 
         # Figure out which samples are already labeled
         labeled_samples: NDArray[np.int_] = np.nonzero(np.array(elem['values']))
+        number_annotations = len(elem['values'])
+        tiny = np.finfo(np.float32).tiny
 
         print(f'{labeled_samples = }')
         print(f'certainty_type = {certainty!r}')
@@ -589,9 +611,17 @@ def predictLabelsForItem(self, gc, annotationName, annotationFolderId, tempdir,
         # In case we are computing batchbald
         compCertainty.set_batchbald_num_samples(16)
         compCertainty.set_batchbald_batch_size(100)
-        compCertainty.set_batchbald_excluded_samples(labeled_samples)
+        #compCertainty.set_batchbald_excluded_samples(labeled_samples)
 
         with h5py.File(featurePath, 'r') as ffptr:
+            if 'used_indices' in ffptr:
+                used_indices = set(list(ffptr['used_indices']))
+            else:
+                used_indices = set(range(number_annotations))
+            all_indices = set(range(number_annotations))
+            unused_indices = list(sorted(all_indices.difference(used_indices)))
+            compCertainty.set_batchbald_excluded_samples(np.array(unused_indices))
+
             prog.item_progress(item, 0)
             # Create predicted annotation
             annot = copy.deepcopy(annotrec)
@@ -600,21 +630,29 @@ def predictLabelsForItem(self, gc, annotationName, annotationFolderId, tempdir,
             annot['elements'][0]['categories'] = [groups[key] for key in labels]
             ds = ffptr['images']
             prog.item_progress(item, 0.05)
-            catWeights, predictions = self.predictLabelsForItemDetails(
-                batchSize, ds, item, model, prog)
-            catWeights = np.array(catWeights)
-            predictions = np.array(predictions)
+            _catWeights, _predictions, indices = self.predictLabelsForItemDetails(
+                batchSize, ds, np.array(list(used_indices), dtype=np.int64), item, model, use_cuda, prog)
+            # expand catWeights and predictions to be length of elem['values'] instead of just `cutoff` samples
+            # then copy in results from predictions
+            catWeights = np.zeros((number_annotations,) + _catWeights.shape[1:], dtype=np.float32 if str(_catWeights.dtype).endswith("32") else np.float64)
+            predictions = np.zeros((number_annotations,) + _predictions.shape[1:], dtype=np.float32 if str(_predictions.dtype).endswith("32") else np.float64)
+            for cw,p,idx in zip(_catWeights, _predictions, indices):
+                catWeights[idx] = cw
+                predictions[idx] = p
+                
             print_fully('predictions', predictions)
             prog.item_progress(item, 0.7)
             # compCertainty needs catWeights to have shape (num_superpixels,
             # bayesian_samples, num_classes) if 'batchbald' is selected, otherwise the
             # shape should be (num_superpixels, num_classes).
-            print_fully('catWeights', catWeights)
             # Ask compCertainty to compute certainties
-            cert = compCertainty.from_numpy_array(catWeights)
+            cert = compCertainty.from_numpy_array(catWeights + tiny)
+            print_fully('catWeights', catWeights)
+
             # After the call to compCertainty, those numbers that end up as values for
             # annot's keys 'values', 'confidence', 'categoryConfidence', and 'certainty'
             # should have shape (num_superpixels, num_classes).
+
             print_fully('cert', cert)
             scores = cert[certainty]['scores']
             print_fully('scores', scores)
@@ -625,14 +663,26 @@ def predictLabelsForItem(self, gc, annotationName, annotationFolderId, tempdir,
                 epsilon = 1e-50
                 predictions = np.log(catWeights + epsilon)
             cats = np.argmax(catWeights, axis=-1)
-            indices = np.arange(cats.shape[0])
-            conf = catWeights[indices, cats[indices]]
+            conf = catWeights[list(all_indices), cats[np.arange(cats.shape[0])]]
             print_fully('cats', cats)
             print_fully('conf', conf)
 
+            # give unused_indices the highest possible confidence so that they show up last in the active learning UI
+            # (because it sorts by confidence in descending order)
+            scores[unused_indices] = np.finfo(scores.dtype).max
+            # additionally, ensure that labels that are already labeled also end up last or late in the recommendations
+            # for the DSA UI, this prevents labeled samples from being shown again to the user
+            scores[labeled_samples] = np.finfo(scores.dtype).max
+
+            # additionally, ensure that labels that are already labeled also end up last or late in the recommendations
+            # for the DSA UI, this prevents labeled samples from being shown again to the user
+            scores[labeled_samples] = np.finfo(scores.dtype).max
+
             cats = cats.tolist()
             conf = conf.tolist()
-            # Should this be from predictions for from catWeights?!!!
+
+            # Should this be from predictions or from catWeights?!!!
+            predictions[np.isneginf(predictions)] = np.finfo(predictions.dtype).min
             catConf = predictions.tolist()
             scores = scores.tolist()
             annot['elements'][0]['values'] = cats
@@ -769,10 +819,10 @@ def makeHeatmapsForItem(self, gc, annotationName, userId, tempdir, radius, item,
                                           'fileId': item['largeImage']['fileId'],
                                           'userId': userId}))
 
-    def predictLabels(self, gc, folderId, annotationName, features, modelFolderId,
+    def predictLabels(self, gc, folderId, annotationName, itemsAndAnnot, features, modelFolderId,
                       annotationFolderId, saliencyMaps, radius, magnification,
                       certainty, batchSize, prog):
-        itemsAndAnnot = self.getItemsAndAnnotations(gc, folderId, annotationName)
+        #itemsAndAnnot = self.getItemsAndAnnotations(gc, folderId, annotationName)
         curEpoch = self.getCurrentEpoch(itemsAndAnnot)
         folder = gc.getFolder(folderId)
         userId = folder['creatorId']
@@ -845,6 +895,10 @@ def main(self, args):
 
         gc = girder_client.GirderClient(apiUrl=args.girderApiUrl)
         gc.token = args.girderToken
+        gc.authenticate('admin', 'password')
+        # dummy check to make sure we have access to server
+        if not [x for x in list(gc.listCollection()) if x['name'] == 'Active Learning']:
+            raise Exception("Unable to authenticate with girder")
 
         with ProgressHelper(
                 'Superpixel Classification', 'Superpixel classification', args.progress) as prog:
@@ -853,16 +907,19 @@ def main(self, args):
                     gc, args.images, args.annotationName, args.radius, args.magnification,
                     args.annotationDir, args.numWorkers, prog)
 
+            itemsAndAnnot = self.getItemsAndAnnotations(gc, args.images, args.annotationName)
             features = self.createFeatures(
-                gc, args.images, args.annotationName, args.features, args.patchSize,
-                args.numWorkers, prog)
+                gc, args.images, args.annotationName, itemsAndAnnot, args.features, args.patchSize,
+                args.numWorkers, prog, args.cutoff)
 
             if args.train:
+                print("Training...")
                 self.trainModel(
-                    gc, args.images, args.annotationName, features, args.modeldir, args.batchSize,
+                    gc, args.images, args.annotationName, itemsAndAnnot, features, args.modeldir, args.batchSize,
                     args.epochs, args.split, args.randominput, args.labels, args.exclude, prog)
 
+            print("Predicting labels...")
             self.predictLabels(
-                gc, args.images, args.annotationName, features, args.modeldir, args.annotationDir,
+                gc, args.images, args.annotationName, itemsAndAnnot, features, args.modeldir, args.annotationDir,
                 args.heatmaps, args.radius, args.magnification, args.certainty, args.batchSize,
                 prog)
diff --git a/superpixel_classification/SuperpixelClassification/SuperpixelClassificationTorch.py b/superpixel_classification/SuperpixelClassification/SuperpixelClassificationTorch.py
index e06d247..243c520 100644
--- a/superpixel_classification/SuperpixelClassification/SuperpixelClassificationTorch.py
+++ b/superpixel_classification/SuperpixelClassification/SuperpixelClassificationTorch.py
@@ -532,12 +532,13 @@ def predictLabelsForItemDetails(
         for cb in callbacks:
             cb.on_predict_begin(logs=logs)
 
+        # ds also needs to have information about the indices so that we can shuffle the data but still link it to an index
         ds: torch.utils.data.TensorDataset = torch.utils.data.TensorDataset(
             (
                 torch.from_numpy(np.array(ds_h5).transpose((0, 3, 2, 1)))
                 if self.feature_is_image
                 else torch.from_numpy(np.array(ds_h5))
-            ),
+            ), torch.from_numpy(indices),
         )
         if batchSize < 1:
             batchSize = self.findOptimalBatchSize(model, ds, training=False)
@@ -545,6 +546,7 @@ def predictLabelsForItemDetails(
         dl: torch.utils.data.DataLoader = torch.utils.data.DataLoader(ds, batch_size=batchSize)
         predictions: NDArray[np.float_] = np.zeros((num_superpixels, bayesian_samples, num_classes))
         catWeights: NDArray[np.float_] = np.zeros((num_superpixels, bayesian_samples, num_classes))
+        outIndices: NDArray[np.int64] = np.zeros(num_superpixels, dtype=np.int64)
         with torch.no_grad():
             model.eval()  # Tell torch that we will be doing predictions
             row: int = 0
@@ -567,6 +569,8 @@ def predictLabelsForItemDetails(
                 catWeights_raw = torch.nn.functional.softmax(predictions_raw, dim=-1)
                 predictions[row:new_row, :, :] = predictions_raw.detach().cpu().numpy()
                 catWeights[row:new_row, :, :] = catWeights_raw.detach().cpu().numpy()
+                outIndices[row:new_row] = data[1].detach().cpu().numpy().astype(np.int64)[:]
+
                 row = new_row
                 for cb in callbacks:
                     cb.on_predict_batch_end(i)
@@ -574,7 +578,7 @@ def predictLabelsForItemDetails(
             cb.on_predict_end({'outputs': predictions})
         prog.item_progress(item, 0.4)
         # scale to units
-        return catWeights, predictions
+        return catWeights, predictions, outIndices
 
     def findOptimalBatchSize(
         self, model: torch.nn.Module, ds: torch.utils.data.TensorDataset, training: bool,

From 207067ad1e6fac3075f68a4110cb4775039dfdd2 Mon Sep 17 00:00:00 2001
From: Anders Sildnes <andsild@posteo.net>
Date: Wed, 21 May 2025 18:14:34 -0500
Subject: [PATCH 2/5] Bugfix: use global index, not batch index, for bounding
 boxes

This may not have been a bug before, but now when indices may not be in
order (since we are using `cutoff`), it becomes relevant
---
 .../SuperpixelClassification/SuperpixelClassificationBase.py    | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/superpixel_classification/SuperpixelClassification/SuperpixelClassificationBase.py b/superpixel_classification/SuperpixelClassification/SuperpixelClassificationBase.py
index d7aa4d7..1d86403 100644
--- a/superpixel_classification/SuperpixelClassification/SuperpixelClassificationBase.py
+++ b/superpixel_classification/SuperpixelClassification/SuperpixelClassificationBase.py
@@ -398,7 +398,7 @@ def createFeaturesForItem(self, gc, item, elem, featureFolderId, fileName, patch
                         if mask.shape[2] == 4:
                             mask = mask[:, :, :-1]
                         maskvals = [[val % 256, val // 256 % 256, val // 65536 % 256]
-                                    for val in [idx * 2, idx * 2 + 1]]
+                                    for val in [(i + 1) * 2, (i + 1) * 2 + 1]]
                         patch_list.append(patch)
                         mask_list.append(mask)
                         maskvals_list.append(maskvals)

From 68293c3d36640abdc5f7c7b3207ee6dcacf4b228 Mon Sep 17 00:00:00 2001
From: Anders Sildnes <andsild@posteo.net>
Date: Mon, 12 May 2025 13:25:13 -0500
Subject: [PATCH 3/5] Add simple tests for features, training, pred

---
 .../tests/generate_MNIST_image.py             | 159 +++++
 .../tests/test_feature_extract.py             | 218 +++++++
 .../tests/test_full_training_cycle.py         | 524 ++++++++++++++++
 .../tests/test_gen_superpixels.py             | 164 +++++
 .../tests/test_predict.py                     | 254 ++++++++
 .../tests/test_tensorflow.py                  |  93 +++
 .../tests/test_torch.py                       |  94 +++
 .../tests/validate_json_annotation.py         | 588 ++++++++++++++++++
 8 files changed, 2094 insertions(+)
 create mode 100644 superpixel_classification/SuperpixelClassification/tests/generate_MNIST_image.py
 create mode 100644 superpixel_classification/SuperpixelClassification/tests/test_feature_extract.py
 create mode 100644 superpixel_classification/SuperpixelClassification/tests/test_full_training_cycle.py
 create mode 100644 superpixel_classification/SuperpixelClassification/tests/test_gen_superpixels.py
 create mode 100644 superpixel_classification/SuperpixelClassification/tests/test_predict.py
 create mode 100644 superpixel_classification/SuperpixelClassification/tests/test_tensorflow.py
 create mode 100644 superpixel_classification/SuperpixelClassification/tests/test_torch.py
 create mode 100644 superpixel_classification/SuperpixelClassification/tests/validate_json_annotation.py

diff --git a/superpixel_classification/SuperpixelClassification/tests/generate_MNIST_image.py b/superpixel_classification/SuperpixelClassification/tests/generate_MNIST_image.py
new file mode 100644
index 0000000..9d7e121
--- /dev/null
+++ b/superpixel_classification/SuperpixelClassification/tests/generate_MNIST_image.py
@@ -0,0 +1,159 @@
+#!/usr/bin/env python
+'''
+Generate a .tiff with numbers from MNIST
+'''
+
+import os
+import argparse
+import random
+
+import numpy as np
+import pandas as pd
+import tifffile
+from PIL import Image
+from torchvision.datasets import MNIST
+
+def parse_args():
+    # Parse arguments
+    parser = argparse.ArgumentParser(description="Generate a pyramidal MNIST image.")
+    parser.add_argument('--root_dataset_path', type=str, default="/data/aza4423_anders/mnist", help='Path to download and store MNIST dataset')
+    #parser.add_argument('--num_images', type=int, default=244 * 244, help='Number of random MNIST images to use')
+    parser.add_argument('--num_images', type=int, default=4, help='Number of random MNIST images to use')
+    parser.add_argument('--output_path', type=str, default="/data/aza4423_anders/aml-dsa/mnist_pyramid.tif", help='Output path for the pyramidal TIF file')
+    parser.add_argument('--test', default=False, type=bool, action=argparse.BooleanOptionalAction,
+                        metavar='T',
+                        help='whether to use test MNIST or train'
+                        )
+
+    args = parser.parse_args()
+
+    return args
+
+def d_to_rgb(d):
+    r = d & 0xFF
+    g = (d >> 8) & 0xFF
+    b = (d >> 16) & 0xFF
+    return [r, g, b]
+
+
+def create_mnist_image(root_dataset_path=".", num_images=100, output_path="./out", test=False, start_value=0):
+    # verify that num_images has a square root; otherwise we'd have to insert blank tiles for the uneven grid
+    assert num_images % np.sqrt(num_images) == 0
+
+    # Download MNIST (if not already downloaded)
+    dataset = MNIST(root=root_dataset_path, train=not test, download=True)
+
+    # Select N random MNIST images (each image is PIL.Image in mode "L")
+    # (Make the number square-rootable)
+    num_images = num_images  # Number of images from argument
+    # oversample if we want more images than the length of MNIST
+    if num_images > len(dataset):
+        indices = random.choices(range(len(dataset)), k=num_images)
+    else:
+        indices = list(range(num_images))
+        random.shuffle(indices)
+
+    #indices = random.sample(range(len(dataset)), num_images)
+    mnist_images = [np.array(dataset[i][0]) for i in indices]  # each is 28x28, uint8
+    mnist_labels = [np.array(dataset[i][1]) for i in indices]
+
+    # Arrange the images in a grid (so num_images should be a number with an integer root)
+    tile_rows, tile_cols = int(np.sqrt(num_images)), int(np.sqrt(num_images))
+    tile_h, tile_w = mnist_images[0].shape  # typically 28x28
+    grid_h, grid_w = tile_rows * tile_h, tile_cols * tile_w
+    base_image = np.zeros((grid_h, grid_w, 3), dtype=np.uint8)
+    pm_image = np.zeros((grid_h, grid_w, 3), dtype=np.uint8)
+
+    for idx, img in enumerate(mnist_images):
+        r = idx // tile_cols
+        c = idx % tile_cols
+        # convert img to RGB
+        rgb_img = np.stack([img, img, img], axis=-1)
+        base_image[r*tile_h:(r+1)*tile_h, c*tile_w:(c+1)*tile_w, :] = rgb_img
+
+        value_img = np.zeros((tile_h, tile_w, 3), dtype=np.uint8)
+        i = (idx + 1) * 2
+        rgb = d_to_rgb(i + start_value)
+        value_img[1:-1, 1:-1] = rgb
+        rgb = d_to_rgb(i + start_value + 1)
+        value_img[0, :] = rgb
+        value_img[-1, :] = rgb
+        value_img[:, 0] = rgb
+        value_img[:, -1] = rgb
+
+        pm_image[r*tile_h:(r+1)*tile_h, c*tile_w:(c+1)*tile_w, :] = value_img
+
+
+    # Note: We assume that the base level corresponds to 40x magnification.
+    # Now, build a pyramid (list of downsampled images).
+    pyramid_pm = [pm_image]
+    pm_current = pm_image.copy()
+
+    pyramid = [base_image]
+    current = base_image.copy()
+    # Continue downsampling by a factor of 2 until one dimension becomes very small.
+    while min(current.shape) >= 64:
+        # Use Pillow to resize (ANTIALIAS gives good quality downsampling)
+        im = Image.fromarray(current)
+        new_w, new_h = current.shape[1] // 2, current.shape[0] // 2
+        if new_w < 1 or new_h < 1:
+            break
+        im_resized = im.resize((new_w, new_h))
+        current = np.array(im_resized)
+        pyramid.append(current)
+
+        im = Image.fromarray(pm_image)
+        new_w, new_h = pm_current.shape[1] // 2, pm_current.shape[0] // 2
+        if new_w < 1 or new_h < 1:
+            break
+        im_resized = im.resize((new_w, new_h))
+        pm_current = np.array(im_resized)
+        pyramid_pm.append(current)
+
+    # Save the image as a pyramidal TIFF.
+    # The base image is the main image and the pyramid list (excluding the base) is saved as subIFDs.
+    output_filename = output_path  # Use the output path from argument
+    if os.path.dirname(output_filename):
+        os.makedirs(os.path.dirname(output_filename), exist_ok=True)
+    if os.path.exists(output_filename):
+        os.remove(output_filename)
+
+    with tifffile.TiffWriter(output_filename, bigtiff=False) as tif:
+        tif.write(pyramid[0],
+                   tile=(tile_w * 4, tile_h * 4),
+                   photometric='RGB',
+                   description='Whole-slide MNIST image at 40x magnification',
+                   subifds=pyramid[1:])
+    print(f"Pyramidal TIFF saved as {output_filename}")
+
+    output_filename_pm = output_filename + ".pixelmap.tiff"  # Use the output path from argument
+    if os.path.dirname(output_filename_pm):
+        os.makedirs(os.path.dirname(output_filename_pm), exist_ok=True)
+    if os.path.exists(output_filename_pm):
+        os.remove(output_filename_pm)
+    with tifffile.TiffWriter(output_filename_pm, bigtiff=False) as tif:
+        tif.write(pyramid_pm[0],
+                  tile=(tile_w * 4, tile_h * 4),
+                  photometric='RGB',
+                  description='Pixelmap for Whole-slide MNIST image at 40x magnification',
+                  subifds=pyramid_pm[1:])
+    print(f"Pyramidal TIFF saved as {output_filename_pm}")
+
+    # generate a corresponding CSV "cells" file
+    # with headers "x,y,w,h" for each image
+    csv_filename = output_filename + "_cells.csv"
+    with open(csv_filename, 'w') as f:
+        f.write("x,y,w,h,value\n")
+        i = 0
+        for r in range(tile_rows):
+            for c in range(tile_cols):
+                x, y = c * tile_w, r * tile_h
+                f.write(f"{x},{y},{tile_w},{tile_h},{mnist_labels[i]}\n")
+                i += 1
+    df = pd.read_csv(csv_filename, header=0)
+    print(f"Annotation file saved as {csv_filename}")
+    return output_filename, output_filename_pm, df
+
+if __name__ == "__main__":
+    _args = parse_args()
+    create_mnist_image(_args.root_dataset_path, _args.num_images, _args.output_path, _args.test)
diff --git a/superpixel_classification/SuperpixelClassification/tests/test_feature_extract.py b/superpixel_classification/SuperpixelClassification/tests/test_feature_extract.py
new file mode 100644
index 0000000..2c17864
--- /dev/null
+++ b/superpixel_classification/SuperpixelClassification/tests/test_feature_extract.py
@@ -0,0 +1,218 @@
+import os
+import shutil
+import sys
+import tempfile
+from unittest.mock import MagicMock
+
+import h5py
+import large_image
+import numpy as np
+import pytest
+
+# make pythonpath work out of the box - although your editor may complain
+SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__))
+sys.path.append(os.path.dirname(SCRIPT_DIR))
+
+from SuperpixelClassificationBase import SuperpixelClassificationBase
+from progress_helper import ProgressHelper
+from tests.generate_MNIST_image import create_mnist_image
+
+from xdg_base_dirs import ( xdg_cache_home, )
+
+NUM_IMAGES = 64
+
+@pytest.fixture(scope="session")
+def create_sample_data():
+    global NUM_IMAGES
+    with tempfile.TemporaryDirectory() as tmpdirname:
+        tiff_path = os.path.join(tmpdirname, "test_mnist.tiff")
+        #tiff_path_pm = os.path.join(tmpdirname, "test_mnist.tiff.pixelmap.tiff")
+
+        tiff_path, tiff_path_pm, labels = create_mnist_image(
+            root_dataset_path=xdg_cache_home(),
+            num_images=NUM_IMAGES,
+            output_path=tiff_path,
+            test=False,
+        )
+        # 0 is background
+        labels['value'] = labels['value'] + 1
+
+        # we use yield so that the temporarydirectory is still open in the tests
+        yield tiff_path, tiff_path_pm, NUM_IMAGES, labels
+
+MNIST_IMAGE_SIZE=28
+COLOR_DIM = 3
+
+def test_cutoff(create_sample_data):
+    global MNIST_IMAGE_SIZE, COLOR_DIM
+    test_image_pth, test_image_pth_pm, num_images, labels = create_sample_data
+    base = SuperpixelClassificationBase()
+
+    # Create test data
+    item = {
+        'name': test_image_pth,
+        'largeImage': {'fileId': 'test_image_id'}
+    }
+
+    # Mock girder client
+    gc = MagicMock()
+    def mv_to_dst(_, dst):
+        if "pixelmap" in dst:
+            if not os.path.exists(dst):
+                return shutil.copy(test_image_pth_pm, dst)
+        else:
+            if not os.path.exists(dst):
+                return shutil.copy(test_image_pth, dst)
+        return None
+    gc.downloadFile = MagicMock(side_effect=mv_to_dst)
+    gc.getItem = MagicMock(return_value={'name': test_image_pth_pm, 'largeImage': {'fileId': 'foobar'}})
+    def mv_to_src(_, src):
+        dst = os.path.dirname(test_image_pth)
+        return shutil.copy(src, dst)
+    gc.uploadFileToFolder = MagicMock(side_effect=mv_to_src, return_value={'_id': 'test_file_id'})
+    #gc.uploadFileToFolder = MagicMock(return_value={'_id': 'test_file_id'})
+
+    bboxes = [[x, y, w + x, y + h] for _, (x, y, w, h) in labels[['x', 'y', 'w', 'h']].iterrows()]
+
+    elem = {
+        'girderId': 'test_girder_id',
+        'values':
+            [] \
+            + list(labels['value'])[:-2]
+            + [0, 0],  # last two images unlabeled
+        'user': {
+            'bbox':  [item for sublist in bboxes for item in sublist]
+        },
+        'transform': {'matrix': [[1.0]]}
+    }
+
+    filename = 'test_features.h5'
+    h5_file = os.path.join(os.path.dirname(test_image_pth), filename)
+    if os.path.exists(h5_file):
+        os.remove(h5_file)
+
+    assert not os.path.exists(h5_file)
+
+    cutoff = 1
+    with ProgressHelper( 'Superpixel Classification',
+                         'Test feature', False) as prog:
+        prog.progress(0)
+        prog.items([item])
+        result = base.createFeaturesForItem(
+            gc=gc,
+            item=item,
+            elem=elem,
+            featureFolderId='test_folder_id',
+            fileName=filename,
+            patchSize=MNIST_IMAGE_SIZE,
+            prog=prog,
+            cutoff=cutoff,
+        )
+
+    assert os.path.exists(h5_file), f"Output file {h5_file} does not exist"
+    with h5py.File(h5_file, 'r') as ffptr:
+        assert 'images' in ffptr
+        assert ffptr['images'].shape == (NUM_IMAGES - cutoff, MNIST_IMAGE_SIZE, MNIST_IMAGE_SIZE, COLOR_DIM)
+        assert len(ffptr['used_indices']) == NUM_IMAGES - cutoff # number of labeled - cutoff
+
+def test_create_features_for_item(create_sample_data):
+    global MNIST_IMAGE_SIZE, COLOR_DIM
+    test_image_pth, test_image_pth_pm, num_images, labels = create_sample_data
+    base = SuperpixelClassificationBase()
+
+    # Create test data
+    item = {
+        'name': test_image_pth,
+        'largeImage': {'fileId': 'test_image_id'}
+    }
+
+    # Mock girder client
+    gc = MagicMock()
+    def mv_to_dst(_, dst):
+        if "pixelmap" in dst:
+            if not os.path.exists(dst):
+                return shutil.copy(test_image_pth_pm, dst)
+        else:
+            if not os.path.exists(dst):
+                return shutil.copy(test_image_pth, dst)
+        return None
+    gc.downloadFile = MagicMock(side_effect=mv_to_dst)
+    gc.getItem = MagicMock(return_value={'name': test_image_pth_pm, 'largeImage': {'fileId': 'foobar'}})
+    def mv_to_src(_, src):
+        dst = os.path.dirname(test_image_pth)
+        return shutil.copy(src, dst)
+    gc.uploadFileToFolder = MagicMock(side_effect=mv_to_src, return_value={'_id': 'test_file_id'})
+    #gc.uploadFileToFolder = MagicMock(return_value={'_id': 'test_file_id'})
+
+    bboxes = [[x, y, w + x, y + h] for _, (x, y, w, h) in labels[['x', 'y', 'w', 'h']].iterrows()]
+
+    elem = {
+        'girderId': 'test_girder_id',
+        'values':
+            [] \
+            + list(labels['value'])[:-2]
+            + [0, 0],  # last two images unlabeled
+        'user': {
+            'bbox':  [item for sublist in bboxes for item in sublist]
+        },
+        'transform': {'matrix': [[1.0]]}
+    }
+
+    filename = 'test_features.h5'
+    h5_file = os.path.join(os.path.dirname(test_image_pth), filename)
+    if os.path.exists(h5_file):
+        os.remove(h5_file)
+
+    assert not os.path.exists(h5_file)
+
+    with ProgressHelper( 'Superpixel Classification',
+                         'Test feature', False) as prog:
+        prog.progress(0)
+        prog.items([item])
+        result = base.createFeaturesForItem(
+            gc=gc,
+            item=item,
+            elem=elem,
+            featureFolderId='test_folder_id',
+            fileName=filename,
+            patchSize=MNIST_IMAGE_SIZE,
+            prog=prog,
+            cutoff=9999
+        )
+
+    assert os.path.exists(h5_file), f"Output file {h5_file} does not exist"
+    with h5py.File(h5_file, 'r') as ffptr:
+        assert 'images' in ffptr
+        assert ffptr['images'].shape == (num_images, MNIST_IMAGE_SIZE, MNIST_IMAGE_SIZE, COLOR_DIM)
+        feature_img = ffptr['images'][0]
+        # open test_image_pth using coordinates [x,y,w,h] from elem['user']['bbox'][:4] and make sure it's pixel-equal with first_img
+        x, y, x2, y2 = elem['user']['bbox'][:4]
+        ts = large_image.getTileSource(test_image_pth)
+        orig_image = ts.getRegion(
+            region=dict(left=x, top=y, right=x2, bottom=y2),
+            format=large_image.tilesource.TILE_FORMAT_NUMPY
+        )[0]
+        orig_image = orig_image.astype(feature_img.dtype)
+        print(orig_image.dtype)
+        np.testing.assert_array_equal(orig_image, feature_img)
+
+        # also check that the last image matches
+        feature_img = ffptr['images'][-1]
+        x, y, x2, y2 = elem['user']['bbox'][-4:]
+        ts = large_image.getTileSource(test_image_pth)
+        orig_image = ts.getRegion(
+            region=dict(left=x, top=y, right=x2, bottom=y2),
+            format=large_image.tilesource.TILE_FORMAT_NUMPY
+        )[0]
+        orig_image = orig_image.astype(feature_img.dtype)
+        print(orig_image.dtype)
+        np.testing.assert_array_equal(orig_image, feature_img)
+
+        assert 'used_indices' in ffptr
+        assert len(ffptr['used_indices']) == num_images
+
+    # Assertions
+    assert result == h5_file
+    assert gc.downloadFile.call_count == 2  # Called for both image and mask
+    assert gc.getItem.call_count == 1
+    assert gc.uploadFileToFolder.call_count == 1
diff --git a/superpixel_classification/SuperpixelClassification/tests/test_full_training_cycle.py b/superpixel_classification/SuperpixelClassification/tests/test_full_training_cycle.py
new file mode 100644
index 0000000..03c6b8a
--- /dev/null
+++ b/superpixel_classification/SuperpixelClassification/tests/test_full_training_cycle.py
@@ -0,0 +1,524 @@
+'''
+This file contains tests for a full training cycle: extracting superpixels, training and evaluation.
+The "cycle" is:
+    1. generate NUM_WSIS different whole slide images using numbers from MNIST.
+    2. extract features from said images.
+    3. train a model on the features.
+    4. evaluate the model on the features.
+We expect an accuracy of at least 90%.
+
+This test is to verify that the training cycle works as expected.
+Since there is batching involved, we want to use a larger number of samples instead of just a quick mini-test, as found in the other files.
+'''
+import argparse
+import glob
+import json
+import os
+import re
+import shutil
+import sys
+import tempfile
+from unittest.mock import MagicMock
+
+import numpy as np
+import pytest
+from xdg_base_dirs import (xdg_cache_home, )
+
+# make pythonpath work out of the box - although your editor may complain
+SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__))
+sys.path.append(os.path.dirname(SCRIPT_DIR))
+
+from SuperpixelClassificationBase import SuperpixelClassificationBase
+from SuperpixelClassificationTensorflow import SuperpixelClassificationTensorflow
+from SuperpixelClassificationTorch import SuperpixelClassificationTorch
+from tests.generate_MNIST_image import create_mnist_image
+
+NUM_WSIS = 2
+MNIST_IMAGE_SIZE = 28
+NUM_IMAGES_PER_WSI = 10 ** 2
+COLOR_DIM = 3
+PATCH_SIZE = 100 # only size compatible with pytorch model for the time being (since there are hardcoded sizes in the definition of the model)
+NUM_EPOCHS = 5
+
+@pytest.fixture(scope="function")
+def create_sample_data(request):
+    global NUM_WSIS, NUM_IMAGES_PER_WSI
+    wsi_paths, pm_paths, list_labels = [], [], []
+    with tempfile.TemporaryDirectory() as tmpdirname:
+        for i in range(NUM_WSIS):
+            tiff_path    = os.path.join(tmpdirname, f"test_mnist_{i}.tiff")
+
+            tiff_path, tiff_path_pm, labels = create_mnist_image(
+                root_dataset_path=xdg_cache_home(),
+                num_images=NUM_IMAGES_PER_WSI,
+                output_path=tiff_path,
+                test=False,
+                start_value = request.param
+            )
+            # where labels['value'] == 0, put 10 instead, since 0 will be reserved for unlabeled
+            labels.loc[labels['value'] == 0, 'value'] = 10
+
+            wsi_paths.append(tiff_path)
+            pm_paths.append(tiff_path_pm)
+            list_labels.append(labels)
+
+        # we use yield so that the temporarydirectory is still open in the tests
+        yield wsi_paths, pm_paths, NUM_WSIS, list_labels
+
+@pytest.mark.skipif("RUNALL" not in os.environ, reason="this is a slow test (~5-10 min), run only if you want to")
+@pytest.mark.parametrize('create_sample_data', [0], indirect=True)
+def test_main_pytorch(create_sample_data):
+    global NUM_WSIS, PATCH_SIZE, MNIST_IMAGE_SIZE, COLOR_DIM, NUM_EPOCHS
+    tiff_paths, tiff_path_pms, num_images, labels = create_sample_data
+    base: SuperpixelClassificationBase = SuperpixelClassificationTorch()
+
+    annotation_name = 'torchMNISTtest'
+    config = dict(
+        annotationDir = 'annotationdir',
+        annotationName = annotation_name,
+        batchSize = int(np.sqrt(NUM_IMAGES_PER_WSI)), # one row of the wsi at a time
+        certainty = 'batchbald',
+        cutoff = 600000, # plenty of space to allow all training samples
+        epochs = NUM_EPOCHS,
+        exclude = [],
+        feature = 'patch',
+        features = 'featuredir',
+        gensuperpixels = False,
+        girderApiUrl = 'http://localhost:8080/api/v1',
+        girderToken = '<PASSWORD>',
+        heatmaps = False,
+        images = 'imagedir',
+        labels = '',
+        magnification = 40.0,
+        modeldir = '',
+        numWorkers = 1,
+        patchSize = PATCH_SIZE,
+        radius    = MNIST_IMAGE_SIZE,
+        randominput = False,
+        split = 0.7,
+        train = True,
+        useCuda = True,
+        progress = True,
+    )
+    args = argparse.Namespace(**config)
+
+    mnist_labels = ['default', '1', '2', '3', '4', '5', '6', '7', '8', '9', '0']
+
+    items = []
+    for i in range(NUM_WSIS):
+        bboxes = [[x, y, w + x, y + h] for _, (x, y, w, h) in labels[i][['x', 'y', 'w', 'h']].iterrows()]
+        elem = {
+            'girderId': f'test_girder_id{i}',
+            'categories': [
+                {"label": c} for c in mnist_labels
+                ],
+            'values': labels[i]['value'].tolist(),
+            'user': {
+                'bbox':  [item for sublist in bboxes for item in sublist]
+            },
+            'transform': {'matrix': [[1.0]]}
+        }
+        item = {
+            '_id': f'test_file{i}',
+            'name': os.path.basename(tiff_paths[i]),
+            'largeImage': {'fileId': f'test_image_id{i}'},
+        }
+        mask_item = {
+            '_id': f'test_file{i}',
+            'name': '.tiff'.join(os.path.basename(tiff_path_pms[i]).split('.tiff')[:-1]),
+            'largeImage': {'fileId': f'test_mask_id{i}'},
+        }
+        annotrec = {
+            '_id': f'test_file{i}',
+            '_version': 0,
+            'annotation': {'name': 'TorchTest'},
+        }
+        items.append((item, annotrec, elem))
+
+
+    gc = MagicMock()
+    base.getItemsAndAnnotations = MagicMock(return_value=items)
+
+    with tempfile.TemporaryDirectory() as tmpdirname:
+        def mv_to_dst(req_pth : str, dst : str):
+            if req_pth.startswith("test_"):
+                for f in tiff_paths + tiff_path_pms:
+                    dpath = os.path.join(dst, os.path.basename(f))
+                    if not os.path.exists(dpath) and os.path.basename(f) == os.path.basename(dst):
+                        shutil.copy(f, dst)
+                        print(f"Copied {f} to {dst}")
+            elif req_pth.startswith("feature"):
+                feature_files = glob.glob(os.path.join(tmpdirname, "*feature.h5"))
+                for f in feature_files:
+                    dpath = os.path.join(dst, os.path.basename(f))
+                    if not os.path.exists(dpath) and os.path.basename(f) == os.path.basename(dst):
+                        shutil.copy(f, dst)
+                        print(f"Copied {f} to {dst}")
+            elif req_pth.endswith("model"):
+                model_file = glob.glob(os.path.join(tmpdirname, f"*Model *{0}.pth"))[0]
+                shutil.copy(model_file, dst)
+            elif "modtraining" in req_pth:
+                model_file = glob.glob(os.path.join(tmpdirname, f"*ModTraining *{0}.h5"))[0]
+                shutil.copy(model_file, dst)
+            else:
+                print(f"Received unknown request path '{req_pth}'")
+            return {}
+
+        gc.downloadFile = MagicMock(side_effect=mv_to_dst)
+        def mv_to_src(req, src, reference=None):
+            shutil.copy(src, tmpdirname)
+            print(f"Copied {src} to {tmpdirname}")
+            # each WSI gets two separate .anot files. The below if statement gives them unique filenames so we can reference later
+            if src.endswith(".anot"):
+                # extract the number at the end of req, which can look like "testfile1" or "testfile1000"
+                m = re.search(r'(\d+)$', req)
+                num = int(m.group(1))
+                s = os.path.basename(src).replace(".anot", f"_{num}.myanot")
+                shutil.copy(src, os.path.join(tmpdirname, s))
+                print(f"Also copied {s} to {tmpdirname}")
+            return {'_id': 'feature', 'name': os.path.basename(src)}
+        gc.uploadFileToFolder = MagicMock(side_effect=mv_to_src, return_value=True)
+
+        gc.getItem = MagicMock(return_value=mask_item)
+
+        gc.listResource = MagicMock(return_value=[dict(name=f"{annotation_name}model", _id = 'model'), dict(name=f"{annotation_name}modtraining", _id = 'modtraining')])
+        gc.uploadFileToItem = MagicMock(side_effect=mv_to_src, return_value=True)
+        gc.getFolder = MagicMock(return_value=dict(name='test_folder', creatorId='creatorId', _id='test_folder_id'))
+
+        def list_file(req: str, limit: int = 0) -> iter:
+            if "modtraining" in req:
+                return iter([dict(name=req, _id = 'modtraining')])
+            else:
+                return iter([dict(name=req, _id='model')])
+        gc.listFile = MagicMock(side_effect=list_file)
+
+        base.main(args, gc)
+
+        for file in sorted(glob.glob(os.path.join(tmpdirname, f"*Predictions*.myanot"))):
+            assert os.path.exists(file)
+            with open(file, 'r') as f:
+                pred_json = json.load(f)
+                e = pred_json['elements'][0]
+                assert len(e['values']) == NUM_IMAGES_PER_WSI
+
+                assert len(e['user']['bbox']) == NUM_IMAGES_PER_WSI * 4 # 4 is for x,y,w,h
+
+                assert len(e['categories']) == len(mnist_labels) - 1 # -1 because we don't have a default category
+                assert len(e['user']['confidence']) == NUM_IMAGES_PER_WSI
+
+                # compare e['values'] to labels['values'], to make sure we've trained a valid model
+                # the order of the values is shuffled in the annotation file, the ordering is in e['categories']
+                file_num = int(file.split('Predictions_')[-1].split('.myanot')[0])
+                predicted_labels = np.array([e['categories'][c]['label'] for c in e['values']])
+                matches = (predicted_labels == np.array(list(map(str, labels[file_num]['value']))))
+                similarity = matches.sum() / len(matches)
+                expected_min_accuracy = 0.75
+                assert similarity > expected_min_accuracy, f"File {file}: Similarity between predicted values and GT is {similarity}, expected > {expected_min_accuracy}"
+                print(f"Similarity between predicted values and GT is {similarity}")
+
+@pytest.mark.skipif("RUNALL" not in os.environ, reason="this is a slow test (~1-10 min), run only if you want to")
+@pytest.mark.parametrize('create_sample_data', [0], indirect=True)
+def test_main_tf(create_sample_data):
+    global NUM_WSIS, PATCH_SIZE, MNIST_IMAGE_SIZE, COLOR_DIM, NUM_EPOCHS
+    tiff_paths, tiff_path_pms, num_images, labels = create_sample_data
+    base: SuperpixelClassificationBase = SuperpixelClassificationTensorflow()
+
+    annotation_name = 'tensorflowMNISTtest'
+    config = dict(
+        annotationDir = 'annotationdir',
+        annotationName = annotation_name,
+        batchSize = int(np.sqrt(NUM_IMAGES_PER_WSI)), # one row of the wsi at a time
+        certainty = 'confidence',
+        cutoff = 600000, # plenty of space to allow all training samples
+        epochs = NUM_EPOCHS,
+        exclude = [],
+        feature = 'patch',
+        features = 'featuredir',
+        gensuperpixels = False,
+        girderApiUrl = 'http://localhost:8080/api/v1',
+        girderToken = '<PASSWORD>',
+        heatmaps = False,
+        images = 'imagedir',
+        labels = '',
+        magnification = 40.0,
+        modeldir = 'modeldir',
+        numWorkers = 1,
+        patchSize = PATCH_SIZE,
+        radius    = MNIST_IMAGE_SIZE,
+        randominput = False,
+        split = 0.7,
+        train = True,
+        useCuda = False,
+        progress = True,
+    )
+    args = argparse.Namespace(**config)
+
+    mnist_labels = ['default', '1', '2', '3', '4', '5', '6', '7', '8', '9', '0']
+
+    items = []
+    for i in range(NUM_WSIS):
+        bboxes = [[x, y, w + x, y + h] for _, (x, y, w, h) in labels[i][['x', 'y', 'w', 'h']].iterrows()]
+        elem = {
+            'girderId': f'test_girder_id{i}',
+            'categories': [
+                {"label": c} for c in mnist_labels
+            ],
+            'values': labels[i]['value'].tolist(),
+            'user': {
+                'bbox':  [item for sublist in bboxes for item in sublist]
+            },
+            'transform': {'matrix': [[1.0]]}
+        }
+        item = {
+            '_id': f'test_file{i}',
+            'name': os.path.basename(tiff_paths[i]),
+            'largeImage': {'fileId': f'test_image_id{i}'},
+        }
+        mask_item = {
+            '_id': f'test_file{i}',
+            'name': '.tiff'.join(os.path.basename(tiff_path_pms[i]).split('.tiff')[:-1]),
+            'largeImage': {'fileId': f'test_mask_id{i}'},
+        }
+        annotrec = {
+            '_id': f'test_file{i}',
+            '_version': 0,
+            'annotation': {'name': 'TorchTest'},
+        }
+        items.append((item, annotrec, elem))
+
+
+    gc = MagicMock()
+    base.getItemsAndAnnotations = MagicMock(return_value=items)
+
+    with tempfile.TemporaryDirectory() as tmpdirname:
+        def mv_to_dst(req_pth : str, dst : str):
+            if req_pth.startswith("test_"):
+                for f in tiff_paths + tiff_path_pms:
+                    dpath = os.path.join(dst, os.path.basename(f))
+                    if not os.path.exists(dpath) and os.path.basename(f) == os.path.basename(dst):
+                        shutil.copy(f, dst)
+                        print(f"MockDownload: Copied {f} to {dst}")
+            elif req_pth.startswith("feature"):
+                feature_files = glob.glob(os.path.join(tmpdirname, "*feature.h5"))
+                for f in feature_files:
+                    dpath = os.path.join(dst, os.path.basename(f))
+                    if not os.path.exists(dpath) and os.path.basename(f) == os.path.basename(dst):
+                        shutil.copy(f, dst)
+                        print(f"MockDownload: Copied {f} to {dst}")
+            elif req_pth.endswith("model"):
+                model_file = glob.glob(os.path.join(tmpdirname, f"*Model *{0}.h5"))[0]
+                shutil.copy(model_file, dst)
+            elif "modtraining" in req_pth:
+                model_file = glob.glob(os.path.join(tmpdirname, f"*ModTraining *{0}.h5"))[0]
+                shutil.copy(model_file, dst)
+            else:
+                raise RuntimeError(f"Received unknown request path '{req_pth}'")
+            return {}
+
+        gc.downloadFile = MagicMock(side_effect=mv_to_dst)
+        def mv_to_src(req, src, reference=None):
+            shutil.copy(src, tmpdirname)
+            print(f"MockUpload: Copied {src} to {tmpdirname}")
+            # each WSI gets two separate .anot files. The below if statement gives them unique filenames so we can reference later
+            if src.endswith(".anot"):
+                # extract the number at the end of req, which can look like "testfile1" or "testfile1000"
+                m = re.search(r'(\d+)$', req)
+                num = int(m.group(1))
+                s = os.path.basename(src).replace(".anot", f"_{num}.myanot")
+                shutil.copy(src, os.path.join(tmpdirname, s))
+                print(f"Also copied {s} to {tmpdirname}")
+            return {'_id': 'feature', 'name': os.path.basename(src)}
+        gc.uploadFileToFolder = MagicMock(side_effect=mv_to_src, return_value=True)
+
+        gc.getItem = MagicMock(return_value=mask_item)
+
+        modelName = f"{annotation_name} Model Epoch 0.h5"
+        modTrainingName = f"{annotation_name} ModTraining Epoch 0.h5"
+        gc.listResource = MagicMock(return_value=[dict(name=modelName, _id = 'model'), dict(name=modTrainingName, _id = 'modtraining')])
+        gc.uploadFileToItem = MagicMock(side_effect=mv_to_src, return_value=True)
+        gc.getFolder = MagicMock(return_value=dict(name='test_folder', creatorId='creatorId', _id='test_folder_id'))
+
+        def list_file(req: str, limit: int = 0) -> iter:
+            if "modtraining" in req:
+                return iter([dict(name=modTrainingName, _id = 'modtraining')])
+            else:
+                return iter([dict(name=modelName, _id='model')])
+        gc.listFile = MagicMock(side_effect=list_file)
+
+        base.main(args, gc)
+
+        for file in sorted(glob.glob(os.path.join(tmpdirname, f"*Predictions*.myanot"))):
+            assert os.path.exists(file)
+            with open(file, 'r') as f:
+                pred_json = json.load(f)
+                e = pred_json['elements'][0]
+                assert len(e['values']) == NUM_IMAGES_PER_WSI
+
+                assert len(e['user']['bbox']) == NUM_IMAGES_PER_WSI * 4 # 4 is for x,y,w,h
+
+                assert len(e['categories']) == len(mnist_labels) - 1 # exclude the default category
+                assert len(e['user']['confidence']) == NUM_IMAGES_PER_WSI
+
+                # compare e['values'] to labels['values'], to make sure we've trained a valid model
+                # the order of the values is shuffled in the annotation file, the ordering is in e['categories']
+                file_num = int(file.split('Predictions_')[-1].split('.myanot')[0])
+                predicted_labels = np.array([e['categories'][c]['label'] for c in e['values']])
+                matches = (predicted_labels == np.array(list(map(str, labels[file_num]['value']))))
+                similarity = matches.sum() / len(matches)
+                expected_min_accuracy = 0.75
+                assert similarity > expected_min_accuracy, f"File {file}: Similarity between predicted values and GT is {similarity}, expected > {expected_min_accuracy}"
+                print(f"Similarity between predicted values and GT is {similarity}")
+
+@pytest.mark.skipif("RUNALL" not in os.environ, reason="this is a slow test (~1-10 min), run only if you want to")
+@pytest.mark.parametrize('create_sample_data', [2], indirect=True)
+def test_main_tf_with_background(create_sample_data):
+    global NUM_WSIS, PATCH_SIZE, MNIST_IMAGE_SIZE, COLOR_DIM, NUM_EPOCHS
+    tiff_paths, tiff_path_pms, num_images, labels = create_sample_data
+    base: SuperpixelClassificationBase = SuperpixelClassificationTensorflow()
+
+    annotation_name = 'tensorflowMNISTtest'
+    config = dict(
+        annotationDir = 'annotationdir',
+        annotationName = annotation_name,
+        batchSize = int(np.sqrt(NUM_IMAGES_PER_WSI)), # one row of the wsi at a time
+        certainty = 'confidence',
+        cutoff = 600000, # plenty of space to allow all training samples
+        epochs = NUM_EPOCHS,
+        exclude = [],
+        feature = 'patch',
+        features = 'featuredir',
+        gensuperpixels = False,
+        girderApiUrl = 'http://localhost:8080/api/v1',
+        girderToken = '<PASSWORD>',
+        heatmaps = False,
+        images = 'imagedir',
+        labels = '',
+        magnification = 40.0,
+        modeldir = 'modeldir',
+        numWorkers = 1,
+        patchSize = PATCH_SIZE,
+        radius    = MNIST_IMAGE_SIZE,
+        randominput = False,
+        split = 0.7,
+        train = True,
+        useCuda = False,
+        progress = True,
+    )
+    args = argparse.Namespace(**config)
+
+    mnist_labels = ['default', '1', '2', '3', '4', '5', '6', '7', '8', '9', '0']
+
+    items = []
+    for i in range(NUM_WSIS):
+        bboxes = [[x, y, w + x, y + h] for _, (x, y, w, h) in labels[i][['x', 'y', 'w', 'h']].iterrows()]
+        elem = {
+            'girderId': f'test_girder_id{i}',
+            'categories': [
+                {"label": c} for c in mnist_labels
+            ],
+            'values': [0] + labels[i]['value'].tolist(),
+            'user': {
+                'bbox':  [0,0,1,1] + [item for sublist in bboxes for item in sublist]
+            },
+            'transform': {'matrix': [[1.0]]}
+        }
+        item = {
+            '_id': f'test_file{i}',
+            'name': os.path.basename(tiff_paths[i]),
+            'largeImage': {'fileId': f'test_image_id{i}'},
+        }
+        mask_item = {
+            '_id': f'test_file{i}',
+            'name': '.tiff'.join(os.path.basename(tiff_path_pms[i]).split('.tiff')[:-1]),
+            'largeImage': {'fileId': f'test_mask_id{i}'},
+        }
+        annotrec = {
+            '_id': f'test_file{i}',
+            '_version': 0,
+            'annotation': {'name': 'TorchTest'},
+        }
+        items.append((item, annotrec, elem))
+
+
+    gc = MagicMock()
+    base.getItemsAndAnnotations = MagicMock(return_value=items)
+
+    with tempfile.TemporaryDirectory() as tmpdirname:
+        def mv_to_dst(req_pth : str, dst : str):
+            if req_pth.startswith("test_"):
+                for f in tiff_paths + tiff_path_pms:
+                    dpath = os.path.join(dst, os.path.basename(f))
+                    if not os.path.exists(dpath) and os.path.basename(f) == os.path.basename(dst):
+                        shutil.copy(f, dst)
+                        print(f"MockDownload: Copied {f} to {dst}")
+            elif req_pth.startswith("feature"):
+                feature_files = glob.glob(os.path.join(tmpdirname, "*feature.h5"))
+                for f in feature_files:
+                    dpath = os.path.join(dst, os.path.basename(f))
+                    if not os.path.exists(dpath) and os.path.basename(f) == os.path.basename(dst):
+                        shutil.copy(f, dst)
+                        print(f"MockDownload: Copied {f} to {dst}")
+            elif req_pth.endswith("model"):
+                model_file = glob.glob(os.path.join(tmpdirname, f"*Model *{0}.h5"))[0]
+                shutil.copy(model_file, dst)
+            elif "modtraining" in req_pth:
+                model_file = glob.glob(os.path.join(tmpdirname, f"*ModTraining *{0}.h5"))[0]
+                shutil.copy(model_file, dst)
+            else:
+                raise RuntimeError(f"Received unknown request path '{req_pth}'")
+            return {}
+
+        gc.downloadFile = MagicMock(side_effect=mv_to_dst)
+        def mv_to_src(req, src, reference=None):
+            shutil.copy(src, tmpdirname)
+            print(f"MockUpload: Copied {src} to {tmpdirname}")
+            # each WSI gets two separate .anot files. The below if statement gives them unique filenames so we can reference later
+            if src.endswith(".anot"):
+                # extract the number at the end of req, which can look like "testfile1" or "testfile1000"
+                m = re.search(r'(\d+)$', req)
+                num = int(m.group(1))
+                s = os.path.basename(src).replace(".anot", f"_{num}.myanot")
+                shutil.copy(src, os.path.join(tmpdirname, s))
+                print(f"Also copied {s} to {tmpdirname}")
+            return {'_id': 'feature', 'name': os.path.basename(src)}
+        gc.uploadFileToFolder = MagicMock(side_effect=mv_to_src, return_value=True)
+
+        gc.getItem = MagicMock(return_value=mask_item)
+
+        modelName = f"{annotation_name} Model Epoch 0.h5"
+        modTrainingName = f"{annotation_name} ModTraining Epoch 0.h5"
+        gc.listResource = MagicMock(return_value=[dict(name=modelName, _id = 'model'), dict(name=modTrainingName, _id = 'modtraining')])
+        gc.uploadFileToItem = MagicMock(side_effect=mv_to_src, return_value=True)
+        gc.getFolder = MagicMock(return_value=dict(name='test_folder', creatorId='creatorId', _id='test_folder_id'))
+
+        def list_file(req: str, limit: int = 0) -> iter:
+            if "modtraining" in req:
+                return iter([dict(name=modTrainingName, _id = 'modtraining')])
+            else:
+                return iter([dict(name=modelName, _id='model')])
+        gc.listFile = MagicMock(side_effect=list_file)
+
+        base.main(args, gc)
+
+        for file in sorted(glob.glob(os.path.join(tmpdirname, f"*Predictions*.myanot"))):
+            assert os.path.exists(file)
+            with open(file, 'r') as f:
+                pred_json = json.load(f)
+                e = pred_json['elements'][0]
+                assert len(e['values']) == NUM_IMAGES_PER_WSI + 1
+
+                assert len(e['user']['bbox']) == (NUM_IMAGES_PER_WSI + 1) * 4 # 4 is for x,y,w,h
+
+                assert len(e['categories']) == len(mnist_labels) - 1 # exclude the default category
+                assert len(e['user']['confidence']) == (NUM_IMAGES_PER_WSI + 1)
+
+                # compare e['values'] to labels['values'], to make sure we've trained a valid model
+                # the order of the values is shuffled in the annotation file, the ordering is in e['categories']
+                file_num = int(file.split('Predictions_')[-1].split('.myanot')[0])
+                predicted_labels = np.array([e['categories'][c]['label'] for c in e['values']])
+                assert e['values'][0] == 0, "Background should have prediction 0"
+                matches = (predicted_labels == np.array([e['values'][0]] + list(map(str, labels[file_num]['value']))))
+                similarity = matches.sum() / len(matches)
+                expected_min_accuracy = 0.75
+                assert similarity > expected_min_accuracy, f"File {file}: Similarity between predicted values and GT is {similarity}, expected > {expected_min_accuracy}"
+                print(f"Similarity between predicted values and GT is {similarity}")
diff --git a/superpixel_classification/SuperpixelClassification/tests/test_gen_superpixels.py b/superpixel_classification/SuperpixelClassification/tests/test_gen_superpixels.py
new file mode 100644
index 0000000..5fc814f
--- /dev/null
+++ b/superpixel_classification/SuperpixelClassification/tests/test_gen_superpixels.py
@@ -0,0 +1,164 @@
+import os
+import shutil
+import sys
+import tempfile
+from unittest.mock import MagicMock
+
+import h5py
+import large_image
+import numpy as np
+import pytest
+from PIL.Image import Image
+from tifffile import tifffile
+
+# make pythonpath work out of the box - although your editor may complain
+SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__))
+sys.path.append(os.path.dirname(SCRIPT_DIR))
+
+from SuperpixelClassificationBase import SuperpixelClassificationBase
+from progress_helper import ProgressHelper
+from tests.generate_MNIST_image import create_mnist_image
+
+from xdg_base_dirs import ( xdg_cache_home, )
+
+NUM_IMAGES : int = 9
+IMAGE_SIZE : int = 16 # 16 is the smallest tile size for .TIFFs, although we could operate within a single tile, too.
+COLOR_DIM = 3
+
+
+def d_to_rgb(d):
+    r = d & 0xFF
+    g = (d >> 8) & 0xFF
+    b = (d >> 16) & 0xFF
+    return [r, g, b]
+
+@pytest.fixture(scope="session")
+def create_sample_data():
+    '''
+    Create a sample WSI for testing.
+    '''
+    global NUM_IMAGES, IMAGE_SIZE
+    num_images = NUM_IMAGES
+    with tempfile.TemporaryDirectory() as tmpdirname:
+        output_filename = os.path.join(tmpdirname, "test.tiff")
+
+        if os.path.dirname(output_filename):
+            os.makedirs(os.path.dirname(output_filename), exist_ok=True)
+        if os.path.exists(output_filename):
+            os.remove(output_filename)
+
+        # Arrange the images in a grid (so num_images should be a number with an integer root)
+        tile_rows, tile_cols = int(np.sqrt(num_images)), int(np.sqrt(num_images))
+        tile_h, tile_w = 16, 16
+        grid_h, grid_w = tile_rows * tile_h, tile_cols * tile_w
+        base_image = np.zeros((grid_h, grid_w, 3), dtype=np.uint8)
+
+        vals = np.array([0, 127, 255], dtype=np.uint8)
+        colors = np.stack(np.meshgrid(vals, vals, vals), axis=-1).reshape(-1, 3)[:NUM_IMAGES]
+        images = np.tile(colors[:, None, None, :], (1, IMAGE_SIZE, IMAGE_SIZE, 1))
+
+        for idx, img in enumerate(images):
+            r = idx // tile_cols
+            c = idx % tile_cols
+            base_image[r*tile_h:(r+1)*tile_h, c*tile_w:(c+1)*tile_w, :] = img
+
+        pyramid = [base_image]
+        current = base_image.copy()
+        while min(current.shape) >= 64:
+            # Use Pillow to resize (ANTIALIAS gives good quality downsampling)
+            im = Image.fromarray(current)
+            new_w, new_h = current.shape[1] // 2, current.shape[0] // 2
+            if new_w < 1 or new_h < 1:
+                break
+            im_resized = im.resize((new_w, new_h))
+            current = np.array(im_resized)
+            pyramid.append(current)
+
+        # Save the image as a pyramidal TIFF.
+        # The base image is the main image and the pyramid list (excluding the base) is saved as subIFDs.
+        if os.path.dirname(output_filename):
+            os.makedirs(os.path.dirname(output_filename), exist_ok=True)
+        if os.path.exists(output_filename):
+            os.remove(output_filename)
+
+        with tifffile.TiffWriter(output_filename, bigtiff=False) as tif:
+            tif.write(pyramid[0],
+                      tile=(tile_w * 4, tile_h * 4),
+                      photometric='RGB',
+                      description='Whole-slide MNIST image at 40x magnification',
+                      subifds=pyramid[1:])
+        print(f"Pyramidal TIFF saved as {output_filename}")
+
+        # we use yield so that the temporarydirectory is still open in the tests
+        yield output_filename, images
+
+def test_gen_superpixel(create_sample_data):
+    global IMAGE_SIZE, COLOR_DIM
+    test_image_pth, test_images = create_sample_data
+    base = SuperpixelClassificationBase()
+
+    # Create test data
+    item = {
+        "_id": "test_item_id",
+        'largeImage': {'fileId': 'test_image_id'},
+        'name': test_image_pth,
+    }
+
+    # Mock girder client
+    gc = MagicMock()
+    def mv_to_dst(_, dst):
+        if not os.path.exists(os.path.join(dst, test_image_pth)):
+            shutil.copy(test_image_pth, dst)
+            print(">>> Copied file from", test_image_pth, "to", dst)
+        return None
+    gc.downloadFile = MagicMock(side_effect=mv_to_dst)
+    gc.getItem = MagicMock(return_value={'name': test_image_pth, 'largeImage': {'fileId': 'foobar'}})
+    def mv_to_src(_, src):
+        dst = os.path.dirname(test_image_pth)
+        if not os.path.exists(os.path.join(dst, src)):
+            shutil.copy(src, dst)
+            print(">>> Copied file from", src, "to", dst)
+        return {'itemId': 'uploaded_item_id'}
+    gc.uploadFileToFolder = MagicMock(side_effect=mv_to_src, return_value={'_id': 'test_file_id'})
+    #gc.uploadFileToFolder = MagicMock(return_value={'_id': 'test_file_id'})
+
+    #bboxes = [[x, y, w + x, y + h] for _, (x, y, w, h) in labels[['x', 'y', 'w', 'h']].iterrows()]
+    bboxes = [[x, x, x + IMAGE_SIZE, x + IMAGE_SIZE] for x in range(0, NUM_IMAGES, IMAGE_SIZE)]
+
+    with ProgressHelper( 'Superpixel Classification',
+                         'Test feature', False) as prog:
+        prog.progress(0)
+        prog.items([item])
+        result = base.createSuperpixelsForItem(
+            gc=gc,
+            annotationName="TorchTest",
+            item=item,
+            radius=IMAGE_SIZE,
+            magnification=40,
+            annotationFolderId='annotation_folder_id',
+            userId="user_id",
+            prog=prog,
+        )
+
+    out_pixelmap_file = os.path.join(os.path.dirname(test_image_pth), '%s.pixelmap.tiff' % item['name'])
+    assert os.path.exists(out_pixelmap_file), f"Output file {out_pixelmap_file} does not exist"
+    x, y, x2, y2 = 0, 0, IMAGE_SIZE, IMAGE_SIZE
+    ts = large_image.getTileSource(test_image_pth)
+    orig_image = ts.getRegion(
+        region=dict(left=x, top=y, right=x2, bottom=y2),
+        format=large_image.tilesource.TILE_FORMAT_NUMPY
+    )[0]
+    # test that all values in orig_image is equal to 1
+    # TODO: waiting for another PR: want this to be 1
+    assert np.all(orig_image == 0)
+
+    feature_img = test_images[-1]
+    x, y, x2, y2 = IMAGE_SIZE * (IMAGE_SIZE - 1), IMAGE_SIZE * (IMAGE_SIZE - 1), IMAGE_SIZE * IMAGE_SIZE, IMAGE_SIZE * IMAGE_SIZE
+    ts = large_image.getTileSource(test_image_pth)
+    orig_image = ts.getRegion(
+        region=dict(left=x, top=y, right=x2, bottom=y2),
+        format=large_image.tilesource.TILE_FORMAT_NUMPY
+    )[0]
+    orig_image = orig_image.astype(feature_img.dtype)
+    # TODO: same as TODO above
+    assert np.all(orig_image == NUM_IMAGES - 1)
\ No newline at end of file
diff --git a/superpixel_classification/SuperpixelClassification/tests/test_predict.py b/superpixel_classification/SuperpixelClassification/tests/test_predict.py
new file mode 100644
index 0000000..9341a90
--- /dev/null
+++ b/superpixel_classification/SuperpixelClassification/tests/test_predict.py
@@ -0,0 +1,254 @@
+import json
+import os
+import shutil
+import tempfile
+from unittest.mock import MagicMock
+
+import h5py
+import numpy as np
+import pytest
+import torch
+
+# make pythonpath work out of the box - although your editor may complain
+import sys
+import os
+SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__))
+sys.path.append(os.path.dirname(SCRIPT_DIR))
+
+from SuperpixelClassificationBase import SuperpixelClassificationBase
+from SuperpixelClassificationTorch import SuperpixelClassificationTorch, _BayesianPatchTorchModel
+from progress_helper import ProgressHelper
+from tests.validate_json_annotation import validate_json_file
+
+# currently, torch model only supports 100x100
+MNIST_IMAGE_SIZE=100
+COLOR_DIM = 3
+NUM_IMAGES = 64
+CUTOFF_IMAGES = 2
+
+@pytest.fixture(scope="session")
+def create_sample_data():
+    global NUM_IMAGES, CUTOFF_IMAGES
+    with tempfile.TemporaryDirectory() as tmpdirname:
+        h5_path = os.path.join(tmpdirname, "test_data.h5")
+
+        images = np.random.randint(0, 255, size=(NUM_IMAGES - CUTOFF_IMAGES, MNIST_IMAGE_SIZE, MNIST_IMAGE_SIZE, COLOR_DIM), dtype=np.uint8)
+        indices = np.arange(NUM_IMAGES - CUTOFF_IMAGES)
+        assert images.shape[0] == indices.shape[0]
+
+        with h5py.File(h5_path, 'w') as f:
+            f.create_dataset('images', data=images)
+            f.create_dataset('used_indices', data=indices, dtype='i')
+
+        # we use yield so that the temporarydirectory is still open in the tests
+        yield h5_path
+
+'''
+This test checks to predictions on a dataset that is only labeled with two values of out ten categories.
+'''
+def test_subset_labels(create_sample_data):
+    global NUM_IMAGES, CUTOFF_IMAGES
+    h5_path = create_sample_data
+    base: SuperpixelClassificationBase = SuperpixelClassificationTorch()
+    base.certainty = 'batchbald'
+    base.feature_is_image = True
+    # Mock girder client
+    gc = MagicMock()
+    def mv_to_dst(_, dst):
+        return shutil.copy(h5_path, dst)
+    gc.downloadFile = MagicMock(side_effect=mv_to_dst)
+    gc.uploadFileToItem = MagicMock()
+
+    feature = {
+        '_id': '0',
+        'name': 'my_test_feature'
+    }
+    labels = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']
+    annotrec = {
+        'annotation': {
+            'attributes': {},
+            'name': 'TorchTest',
+        },
+    }
+
+    # make a list with values 1 and 3 in a random order, and NUM_IMAGES entries
+    value_list = [1, 3] * (NUM_IMAGES // 2)
+
+    elem = {
+        "type": "pixelmap",
+        "girderId": "6838aab654f0ca783ff03871",
+        "transform": {"matrix": [[1.0, 0], [0, 1.0]]},
+        'values': value_list,
+        'categories' : [{"label": k, "fillColor": "rgba(0,0,0,0)"} for k in labels],
+        "boundaries": True,
+        "id": "myid",
+        'user': { },
+    }
+
+    groups = { k: {"label": k, "fillColor": "rgba(0,0,0,0)", "strokeColor": "rgba(0,0,0,0)" } for k in labels }
+
+    device = torch.device("cpu")
+    model = _BayesianPatchTorchModel(len(labels), device)
+    model.device = device
+
+    items = [(feature, annotrec, elem)]
+    item = {'_id': 0, 'name': 'my_item', 'largeImage': {'fileId': 'test_image_id'}}
+    with ProgressHelper( 'Superpixel Classification',
+                         'Test feature', False) as prog:
+        prog.progress(0)
+        prog.items(items)
+
+        annotation_name = 'testannotation'
+        with tempfile.TemporaryDirectory() as tmpdirname:
+            base.predictLabelsForItem(
+                gc=gc,
+                annotationName=annotation_name,
+                tempdir=tmpdirname,
+                model=model,
+                item=item,
+                annotrec=annotrec,
+                elem=elem,
+                feature=feature,
+                curEpoch=0,
+                userId='user_id',
+                labels=labels,
+                groups=groups,
+                makeHeatmaps=False,
+                radius=-1,
+                magnification=40.0,
+                certainty='batchbald',
+                batchSize=NUM_IMAGES,
+                use_cuda = False,
+                prog=prog,
+            )
+            out_pth = os.path.join(tmpdirname, '%s Epoch 0 Predictions.anot' % annotation_name)
+            assert os.path.exists(out_pth), "Output file %s does not exist" % out_pth
+            with open(out_pth, 'r') as f:
+                pred_json = json.load(f)
+                e = pred_json['elements'][0]
+                assert len(e['values']) == NUM_IMAGES
+                for i in range(1, CUTOFF_IMAGES):
+                    assert e['values'][-i] == 0, "Expected unknown/none label for cutoff images"
+                assert len(e['categories']) == len(labels)
+                assert len(e['user']['confidence']) == NUM_IMAGES
+                assert len(e['user']['categoryConfidence']) == NUM_IMAGES
+                assert len(e['user']['categoryConfidence'][0]) == len(labels)
+                assert len(e['user']['certainty']) == NUM_IMAGES
+                for i in range(1, CUTOFF_IMAGES):
+                    assert e['user']['certainty'][-i] > 10000, "Expected certainty to be very high for unlabeled samples to ensure they occur last in the AL filmstrip (DSA)"
+                assert 'percentiles' in e['user']['certainty_info']
+                assert 'cdf' in e['user']['certainty_info']
+
+            validate_json_file(out_pth)
+
+            out_pth = os.path.join(tmpdirname, '%s Epoch 1.anot' % annotation_name)
+            assert os.path.exists(out_pth), "Output file %s does not exist" % out_pth
+            with open(out_pth, 'r') as f:
+                annotation_file = json.load(f)
+                e = annotation_file['elements'][0]
+                assert len(e['values']) == NUM_IMAGES
+                assert len(e['categories']) == len(labels)
+
+            validate_json_file(out_pth)
+
+def test_predict_unlabeled_with_cutoff(create_sample_data):
+    global NUM_IMAGES, CUTOFF_IMAGES
+    h5_path = create_sample_data
+    base: SuperpixelClassificationBase = SuperpixelClassificationTorch()
+    base.certainty = 'batchbald'
+    base.feature_is_image = True
+    # Mock girder client
+    gc = MagicMock()
+    def mv_to_dst(_, dst):
+        return shutil.copy(h5_path, dst)
+    gc.downloadFile = MagicMock(side_effect=mv_to_dst)
+    gc.uploadFileToItem = MagicMock()
+
+    feature = {
+       '_id': '0',
+       'name': 'my_test_feature'
+    }
+    labels = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']
+    annotrec = {
+        'annotation': {
+            'attributes': {},
+            'name': 'TorchTest',
+        },
+    }
+
+    elem = {
+        "type": "pixelmap",
+        "girderId": "6838aab654f0ca783ff03871",
+        "transform": {"matrix": [[1.0, 0], [0, 1.0]]},
+        'values': [0] * NUM_IMAGES,
+        'categories' : [{"label": k, "fillColor": "rgba(0,0,0,0)"} for k in labels],
+        "boundaries": True,
+        "id": "myid",
+        'user': { },
+    }
+
+    groups = { k: {"label": k, "fillColor": "rgba(0,0,0,0)", "strokeColor": "rgba(0,0,0,0)" } for k in labels }
+
+    device = torch.device("cpu")
+    model = _BayesianPatchTorchModel(len(labels), device)
+    model.device = device
+
+    items = [(feature, annotrec, elem)]
+    item = {'_id': 0, 'name': 'my_item', 'largeImage': {'fileId': 'test_image_id'}}
+    with ProgressHelper( 'Superpixel Classification',
+                         'Test feature', False) as prog:
+        prog.progress(0)
+        prog.items(items)
+
+        annotation_name = 'testannotation'
+        with tempfile.TemporaryDirectory() as tmpdirname:
+            base.predictLabelsForItem(
+                gc=gc,
+                annotationName=annotation_name,
+                tempdir=tmpdirname,
+                model=model,
+                item=item,
+                annotrec=annotrec,
+                elem=elem,
+                feature=feature,
+                curEpoch=0,
+                userId='user_id',
+                labels=labels,
+                groups=groups,
+                makeHeatmaps=False,
+                radius=-1,
+                magnification=40.0,
+                certainty='batchbald',
+                batchSize=NUM_IMAGES,
+                use_cuda = False,
+                prog=prog,
+            )
+            out_pth = os.path.join(tmpdirname, '%s Epoch 0 Predictions.anot' % annotation_name)
+            assert os.path.exists(out_pth), "Output file %s does not exist" % out_pth
+            with open(out_pth, 'r') as f:
+                pred_json = json.load(f)
+                e = pred_json['elements'][0]
+                assert len(e['values']) == NUM_IMAGES
+                for i in range(1, CUTOFF_IMAGES):
+                    assert e['values'][-i] == 0, "Expected unknown/none label for cutoff images"
+                assert len(e['categories']) == len(labels)
+                assert len(e['user']['confidence']) == NUM_IMAGES
+                assert len(e['user']['categoryConfidence']) == NUM_IMAGES
+                assert len(e['user']['categoryConfidence'][0]) == len(labels)
+                assert len(e['user']['certainty']) == NUM_IMAGES
+                for i in range(1, CUTOFF_IMAGES):
+                    assert e['user']['certainty'][-i] > 10000, "Expected certainty to be very high for unlabeled samples to ensure they occur last in the AL filmstrip (DSA)"
+                assert 'percentiles' in e['user']['certainty_info']
+                assert 'cdf' in e['user']['certainty_info']
+
+            validate_json_file(out_pth)
+
+            out_pth = os.path.join(tmpdirname, '%s Epoch 1.anot' % annotation_name)
+            assert os.path.exists(out_pth), "Output file %s does not exist" % out_pth
+            with open(out_pth, 'r') as f:
+                annotation_file = json.load(f)
+                e = annotation_file['elements'][0]
+                assert len(e['values']) == NUM_IMAGES
+                assert len(e['categories']) == len(labels)
+
+            validate_json_file(out_pth)
diff --git a/superpixel_classification/SuperpixelClassification/tests/test_tensorflow.py b/superpixel_classification/SuperpixelClassification/tests/test_tensorflow.py
new file mode 100644
index 0000000..1a40365
--- /dev/null
+++ b/superpixel_classification/SuperpixelClassification/tests/test_tensorflow.py
@@ -0,0 +1,93 @@
+import os
+import shutil
+import tempfile
+from unittest.mock import MagicMock
+
+import h5py
+import numpy as np
+import pytest
+
+# make pythonpath work out of the box - although your editor may complain
+import sys
+import os
+SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__))
+sys.path.append(os.path.dirname(SCRIPT_DIR))
+
+from SuperpixelClassificationBase import SuperpixelClassificationBase
+from SuperpixelClassificationTensorflow import SuperpixelClassificationTensorflow
+from progress_helper import ProgressHelper
+
+MNIST_IMAGE_SIZE=28
+COLOR_DIM = 3
+NUM_IMAGES = 64
+
+@pytest.fixture(scope="session")
+def create_sample_data():
+    global NUM_IMAGES
+    with tempfile.TemporaryDirectory() as tmpdirname:
+        h5_path = os.path.join(tmpdirname, "test_data.h5")
+        images = np.random.randint(0, 255, size=(NUM_IMAGES, MNIST_IMAGE_SIZE, MNIST_IMAGE_SIZE, COLOR_DIM), dtype=np.uint8)
+
+        with h5py.File(h5_path, 'w') as f:
+            f.create_dataset('images', data=images)
+            f.create_dataset('used_indices', data=np.arange(NUM_IMAGES - 2))
+
+        # we use yield so that that the temporarydirectory is still open in the tests
+        yield h5_path
+
+def test_train_model(create_sample_data):
+    global NUM_IMAGES
+    h5_path = create_sample_data
+    base: SuperpixelClassificationBase
+    base = SuperpixelClassificationTensorflow()
+    base.feature_is_image = True
+    base.certainty = 'not batchbald' # same as using tensorflow
+
+    # Mock girder client
+    gc = MagicMock()
+    def mv_to_dst(_, dst):
+        return shutil.copy(h5_path, dst)
+    gc.downloadFile = MagicMock(side_effect=mv_to_dst)
+    def mv_to_src(_, src):
+        dst = os.path.dirname(os.path.dirname(h5_path))
+        return shutil.copy(src, dst)
+    gc.uploadFileToFolder = MagicMock(side_effect=mv_to_src, return_value=True)
+
+    labels = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']
+    elem = {
+        'girderId': 'test_girder_id',
+        'categories': [
+            {"label": c} for c in labels
+            ],
+        'values':
+            [] \
+            + np.random.randint(1, len(labels) - 1, size=(NUM_IMAGES - 2), dtype=np.uint8).tolist()
+            + [0, 0],  # last two images unlabeled
+        'transform': {'matrix': [[1.0]]}
+    }
+
+    item = {'_id': 'test_h5_file', 'name': 'test'}
+    annotrec = {'_id': '1', '_version': 0, 'annotation': {'name': 'TorchTest'}}
+    items = [(item, annotrec, elem)]
+    with ProgressHelper( 'Superpixel Classification',
+                         'Test feature', False) as prog:
+        prog.progress(0)
+        prog.items(items)
+        modelFile, modelTrainingFile = base.trainModel(
+            annotationName="TorchTest",
+            batchSize = 4,
+            epochs = 1,
+            excludeLabelList = [],
+            features={'test_h5_file': {'_id': 'feature_id', 'name': 'test_h5_file'}},
+            gc=gc,
+            itemsAndAnnot=items,
+            labelList = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9'],
+            modelFolderId="test_folder_id",
+            prog=prog,
+            randomInput = False,
+            trainingSplit = 0.5,
+            use_cuda = False,
+        )
+
+    assert os.path.exists(modelFile)
+    assert os.path.exists(modelTrainingFile)
\ No newline at end of file
diff --git a/superpixel_classification/SuperpixelClassification/tests/test_torch.py b/superpixel_classification/SuperpixelClassification/tests/test_torch.py
new file mode 100644
index 0000000..edb7dbc
--- /dev/null
+++ b/superpixel_classification/SuperpixelClassification/tests/test_torch.py
@@ -0,0 +1,94 @@
+import os
+import shutil
+import tempfile
+from unittest.mock import MagicMock
+
+import h5py
+import numpy as np
+import pytest
+
+# make pythonpath work out of the box - although your editor may complain
+import sys
+import os
+SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__))
+sys.path.append(os.path.dirname(SCRIPT_DIR))
+
+from SuperpixelClassificationBase import SuperpixelClassificationBase
+from SuperpixelClassificationTorch import SuperpixelClassificationTorch
+from progress_helper import ProgressHelper
+
+# currently, torch model only supports 100x100
+MNIST_IMAGE_SIZE=100
+COLOR_DIM = 3
+NUM_IMAGES = 64
+
+@pytest.fixture(scope="session")
+def create_sample_data():
+    global NUM_IMAGES
+    with tempfile.TemporaryDirectory() as tmpdirname:
+        h5_path = os.path.join(tmpdirname, "test_data.h5")
+        images = np.random.randint(0, 255, size=(NUM_IMAGES, MNIST_IMAGE_SIZE, MNIST_IMAGE_SIZE, COLOR_DIM), dtype=np.uint8)
+
+        with h5py.File(h5_path, 'w') as f:
+            f.create_dataset('images', data=images)
+            f.create_dataset('used_indices', data=np.arange(NUM_IMAGES - 2))
+
+        # we use yield so that that the temporarydirectory is still open in the tests
+        yield h5_path
+
+def test_train_model(create_sample_data):
+    global NUM_IMAGES
+    h5_path = create_sample_data
+    base: SuperpixelClassificationBase
+    base = SuperpixelClassificationTorch()
+    base.feature_is_image = True
+    base.certainty = 'batchbald' # same as using torch
+
+    # Mock girder client
+    gc = MagicMock()
+    def mv_to_dst(_, dst):
+        return shutil.copy(h5_path, dst)
+    gc.downloadFile = MagicMock(side_effect=mv_to_dst)
+    def mv_to_src(_, src):
+        dst = os.path.dirname(os.path.dirname(h5_path))
+        return shutil.copy(src, dst)
+    gc.uploadFileToFolder = MagicMock(side_effect=mv_to_src, return_value=True)
+
+    labels = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']
+    elem = {
+        'girderId': 'test_girder_id',
+        'categories': [
+            {"label": c} for c in labels
+            ],
+        'values':
+            [] \
+            + np.random.randint(1, len(labels) - 1, size=(NUM_IMAGES - 2), dtype=np.uint8).tolist()
+            + [0, 0],  # last two images unlabeled
+        'transform': {'matrix': [[1.0]]}
+    }
+
+    item = {'_id': 'test_h5_file', 'name': 'test'}
+    annotrec = {'_id': '1', '_version': 0, 'annotation': {'name': 'TorchTest'}}
+    items = [(item, annotrec, elem)]
+    with ProgressHelper( 'Superpixel Classification',
+                         'Test feature', False) as prog:
+        prog.progress(0)
+        prog.items(items)
+        modelFile, modelTrainingFile = base.trainModel(
+            annotationName="TorchTest",
+            batchSize = 4,
+            epochs = 1,
+            excludeLabelList = [],
+            features={'test_h5_file': {'_id': 'feature_id', 'name': 'test_h5_file'}},
+            gc=gc,
+            itemsAndAnnot=items,
+            labelList = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9'],
+            modelFolderId="test_folder_id",
+            prog=prog,
+            randomInput = False,
+            trainingSplit = 0.5,
+            use_cuda = True,
+        )
+
+    assert os.path.exists(modelFile)
+    assert os.path.exists(modelTrainingFile)
diff --git a/superpixel_classification/SuperpixelClassification/tests/validate_json_annotation.py b/superpixel_classification/SuperpixelClassification/tests/validate_json_annotation.py
new file mode 100644
index 0000000..5f209ce
--- /dev/null
+++ b/superpixel_classification/SuperpixelClassification/tests/validate_json_annotation.py
@@ -0,0 +1,588 @@
+#!/usr/bin/env python
+'''
+This code is similar to girder_annotation/girder_large_image_annotation/models/annotation.py
+The meaning is to validate the json annotation file without having to use girder or large_image
+'''
+import argparse
+import json
+import logging
+import os
+import sys
+import jsonschema
+from tqdm import tqdm
+
+import copy
+
+def extendSchema(base, add):
+    extend = copy.deepcopy(base)
+    for key in add:
+        if key == 'required' and 'required' in base:
+            extend[key] = sorted(set(extend[key]) | set(add[key]))
+        elif key != 'properties' and 'properties' in base:
+            extend[key] = add[key]
+    if 'properties' in add:
+        extend['properties'].update(add['properties'])
+    return extend
+
+
+colorSchema = {
+    'type': 'string',
+    # We accept colors of the form
+    #   #rrggbb                 six digit RRGGBB hex
+    #   #rgb                    three digit RGB hex
+    #   #rrggbbaa               eight digit RRGGBBAA hex
+    #   #rgba                   four digit RGBA hex
+    #   rgb(255, 255, 255)      rgb decimal triplet
+    #   rgba(255, 255, 255, 1)  rgba quad with RGB in the range [0-255] and
+    #                           alpha [0-1]
+    'pattern': r'^(#([0-9a-fA-F]{3,4}|[0-9a-fA-F]{6}|[0-9a-fA-F]{8})|'
+               r'rgb\(\d+,\s*\d+,\s*\d+\)|'
+               r'rgba\(\d+,\s*\d+,\s*\d+,\s*(\d?\.|)\d+\))$',
+}
+
+transformArray = {
+    'type': 'array',
+    'items': {
+        'type': 'array',
+        'minItems': 2,
+        'maxItems': 2,
+    },
+    'minItems': 2,
+    'maxItems': 2,
+    'description': 'A 2D matrix representing the transform of an '
+                    'image overlay.',
+}
+
+
+colorRangeSchema = {
+    'type': 'array',
+    'items': colorSchema,
+    'description': 'A list of colors',
+}
+
+rangeValueSchema = {
+    'type': 'array',
+    'items': {'type': 'number'},
+    'description': 'A weakly monotonic list of range values',
+}
+
+userSchema = {
+    'type': 'object',
+    'additionalProperties': True,
+}
+
+labelSchema = {
+    'type': 'object',
+    'properties': {
+        'value': {'type': 'string'},
+        'visibility': {
+            'type': 'string',
+            # TODO: change to True, False, None?
+            'enum': ['hidden', 'always', 'onhover'],
+        },
+        'fontSize': {
+            'type': 'number',
+            'exclusiveMinimum': 0,
+        },
+        'color': colorSchema,
+    },
+    'required': ['value'],
+    'additionalProperties': False,
+}
+
+groupSchema = {'type': 'string'}
+
+baseElementSchema = {
+    'type': 'object',
+    'properties': {
+        'id': {
+            'type': 'string',
+            'pattern': '^[0-9a-f]{24}$',
+        },
+        'type': {'type': 'string'},
+        # schema free field for users to extend annotations
+        'user': userSchema,
+        'label': labelSchema,
+        'group': groupSchema,
+    },
+    'required': ['type'],
+    'additionalProperties': True,
+}
+baseShapeSchema = extendSchema(baseElementSchema, {
+    'properties': {
+        'lineColor': colorSchema,
+        'lineWidth': {
+            'type': 'number',
+            'minimum': 0,
+        },
+    },
+})
+
+
+pixelmapCategorySchema = {
+    'type': 'object',
+    'properties': {
+        'fillColor': colorSchema,
+        'strokeColor': colorSchema,
+        'label': {
+            'type': 'string',
+            'description': 'A string representing the semantic '
+                           'meaning of regions of the map with '
+                           'the corresponding color.',
+        },
+        'description': {
+            'type': 'string',
+            'description': 'A more detailed explanation of the '
+                           'meaining of this category.',
+        },
+    },
+    'required': ['fillColor'],
+    'additionalProperties': False,
+}
+
+_annotationSchema = {
+    'type': 'object',
+    'properties': {
+        'value': colorSchema,
+        'id': colorSchema,
+        'label': {
+            'type': 'string',
+            'description': 'A string representing the semantic '
+                           'meaning of regions of the map with '
+                           'the corresponding color.',
+        },
+        'description': {
+            'type': 'string',
+            'description': 'A more detailed explanation of the '
+                           'meaining of this category.',
+        },
+    },
+    'required': ['fillColor'],
+    'additionalProperties': False,
+}
+
+
+overlaySchema = extendSchema(baseElementSchema, {
+    'properties': {
+        'type': {
+            'type': 'string',
+            'enum': ['image'],
+        },
+        'girderId': {
+            'type': 'string',
+            'pattern': '^[0-9a-f]{24}$',
+            'description': 'Girder item ID containing the image to '
+                            'overlay.',
+        },
+        'opacity': {
+            'type': 'number',
+            'minimum': 0,
+            'maximum': 1,
+            'description': 'Default opacity for this image overlay. Must '
+                            'be between 0 and 1. Defaults to 1.',
+        },
+        'hasAlpha': {
+            'type': 'boolean',
+            'description':
+                'If true, the image is treated assuming it has an alpha '
+                'channel.',
+        },
+        'transform': {
+            'type': 'object',
+            'description': 'Specification for an affine transform of the '
+                            'image overlay. Includes a 2D transform matrix, '
+                            'an X offset and a Y offset.',
+            'properties': {
+                'xoffset': {
+                    'type': 'number',
+                },
+                'yoffset': {
+                    'type': 'number',
+                },
+                'matrix': transformArray,
+            },
+        },
+    },
+    'required': ['girderId', 'type'],
+    'additionalProperties': False,
+    'description': 'An image overlay on top of the base resource.',
+})
+
+
+pixelmapSchema = extendSchema(overlaySchema, {
+    'properties': {
+        'type': {
+            'type': 'string',
+            'enum': ['pixelmap'],
+        },
+        'values': {
+            'type': 'array',
+            'items': {'type': 'integer'},
+            'description': 'An array where the indices '
+                           'correspond to pixel values in the '
+                           'pixel map image and the values are '
+                           'used to look up the appropriate '
+                           'color in the categories property.',
+        },
+        'categories': {
+            'type': 'array',
+            'items': pixelmapCategorySchema,
+            'description': 'An array used to map between the '
+                           'values array and color values. '
+                           'Can also contain semantic '
+                           'information for color values.',
+        },
+        'boundaries': {
+            'type': 'boolean',
+            'description': 'True if the pixelmap doubles pixel '
+                           'values such that even values are the '
+                           'fill and odd values the are stroke '
+                           'of each superpixel. If true, the '
+                           'length of the values array should be '
+                           'half of the maximum value in the '
+                           'pixelmap.',
+
+        },
+    },
+    'required': ['values', 'categories', 'boundaries'],
+    'additionalProperties': False,
+    'description': 'A tiled pixelmap to overlay onto a base resource.',
+})
+
+bboxSchema = extendSchema(overlaySchema, {
+    'properties': {
+        'type': {
+            'type': 'string',
+            'enum': ['bboxmap'],
+        },
+        'categories': {
+            'type': 'array',
+            'items': pixelmapCategorySchema,
+            'description': 'An array used to map between the '
+                           'values array and color values. '
+                           'Can also contain semantic '
+                           'information for color values.',
+        },
+        'annotations': {
+            'type': 'array',
+            'description': 'Value, id, and bounding box for each annotation',
+                'items': {
+                'type': 'object',
+                'additionalProperties': False,
+                'properties': {
+                    'value': {
+                        'type': 'integer',
+                    },
+                    'id': {
+                        'type': 'integer',
+                    },
+                    'bbox': {
+                        'type': 'array',
+                        'items': {'type': 'number'},
+                        'minItems': 4,
+                        'maxItems': 4,
+                        'description': 'Bounding box in the form '
+                                       '[left, top, right, bottom].',
+                    },
+                }
+            }
+        },
+        'boundaries': {
+            'type': 'boolean',
+            'description': 'True if the pixelmap doubles pixel '
+                           'values such that even values are the '
+                           'fill and odd values the are stroke '
+                           'of each superpixel. If true, the '
+                           'length of the values array should be '
+                           'half of the maximum value in the '
+                           'pixelmap.',
+
+        },
+    },
+    'required': ['categories', 'boundaries', 'annotations'],
+    'additionalProperties': True,
+    'description': 'A tiled pixelmap to overlay onto a base resource.',
+})
+
+annotationElementSchema = {
+    # Shape subtypes are mutually exclusive, so for efficiency, don't use
+    # 'oneOf'
+    'anyOf': [
+        pixelmapSchema,
+        bboxSchema,
+    ],
+}
+
+
+class AnnotationSchema:
+    annotationSchema = {
+        '$schema': 'http://json-schema.org/schema#',
+        'type': 'object',
+        'properties': {
+            'name': {
+                'type': 'string',
+                # TODO: Disallow empty?
+                'minLength': 1,
+            },
+            'description': {'type': 'string'},
+            'display': {
+                'type': 'object',
+                'properties': {
+                    'visible': {
+                        'type': ['boolean', 'string'],
+                        'enum': ['new', True, False],
+                        'description': 'This advises viewers on when the '
+                        'annotation should be shown.  If "new" (the default), '
+                        'show the annotation when it is first added to the '
+                        "system.  If false, don't show the annotation by "
+                        'default.  If true, show the annotation when the item '
+                        'is displayed.',
+                    },
+                },
+            },
+            'attributes': {
+                'type': 'object',
+                'additionalProperties': True,
+                'title': 'Image Attributes',
+                'description': 'Subjective things that apply to the entire '
+                               'image.',
+            },
+            'elements': {
+                'type': 'array',
+                'items': annotationElementSchema,
+                # We want to ensure unique element IDs, if they are set.  If
+                # they are not set, we assign them from Mongo.
+                'title': 'Image Markup',
+                'description': 'Subjective things that apply to a '
+                               'spatial region.',
+            },
+        },
+        'additionalProperties': False,
+    }
+
+
+
+    coordSchema = {
+        'type': 'array',
+        # TODO: validate that z==0 for now
+        'items': {
+            'type': 'number',
+        },
+        'minItems': 3,
+        'maxItems': 3,
+        'name': 'Coordinate',
+        # TODO: define origin for 3D images
+        'description': 'An X, Y, Z coordinate tuple, in base layer pixel '
+                       'coordinates, where the origin is the upper-left.',
+    }
+    coordValueSchema = {
+        'type': 'array',
+        'items': {
+            'type': 'number',
+        },
+        'minItems': 4,
+        'maxItems': 4,
+        'name': 'CoordinateWithValue',
+        'description': 'An X, Y, Z, value coordinate tuple, in base layer '
+                       'pixel coordinates, where the origin is the upper-left.',
+    }
+
+    colorSchema = {
+        'type': 'string',
+        # We accept colors of the form
+        #   #rrggbb                 six digit RRGGBB hex
+        #   #rgb                    three digit RGB hex
+        #   #rrggbbaa               eight digit RRGGBBAA hex
+        #   #rgba                   four digit RGBA hex
+        #   rgb(255, 255, 255)      rgb decimal triplet
+        #   rgba(255, 255, 255, 1)  rgba quad with RGB in the range [0-255] and
+        #                           alpha [0-1]
+        # TODO: make rgb and rgba spec validate that rgb is [0-255] and a is
+        # [0-1], rather than just checking if they are digits and such.
+        'pattern': r'^(#([0-9a-fA-F]{3,4}|[0-9a-fA-F]{6}|[0-9a-fA-F]{8})|'
+                   r'rgb\(\d+,\s*\d+,\s*\d+\)|'
+                   r'rgba\(\d+,\s*\d+,\s*\d+,\s*(\d?\.|)\d+\))$',
+    }
+
+    colorRangeSchema = {
+        'type': 'array',
+        'items': colorSchema,
+        'description': 'A list of colors',
+    }
+
+    rangeValueSchema = {
+        'type': 'array',
+        'items': {'type': 'number'},
+        'description': 'A weakly monotonic list of range values',
+    }
+
+    userSchema = {
+        'type': 'object',
+        'additionalProperties': True,
+    }
+
+    labelSchema = {
+        'type': 'object',
+        'properties': {
+            'value': {'type': 'string'},
+            'visibility': {
+                'type': 'string',
+                # TODO: change to True, False, None?
+                'enum': ['hidden', 'always', 'onhover'],
+            },
+            'fontSize': {
+                'type': 'number',
+                'exclusiveMinimum': 0,
+            },
+            'color': colorSchema,
+        },
+        'required': ['value'],
+        'additionalProperties': False,
+    }
+
+    groupSchema = {'type': 'string'}
+
+    baseElementSchema = {
+        'type': 'object',
+        'properties': {
+            'id': {
+                'type': 'string',
+                'pattern': '^[0-9a-f]{24}$',
+            },
+            'type': {'type': 'string'},
+            # schema free field for users to extend annotations
+            'user': userSchema,
+            'label': labelSchema,
+            'group': groupSchema,
+        },
+        'required': ['type'],
+        'additionalProperties': True,
+    }
+    baseShapeSchema = extendSchema(baseElementSchema, {
+        'properties': {
+            'lineColor': colorSchema,
+            'lineWidth': {
+                'type': 'number',
+                'minimum': 0,
+            },
+        },
+    })
+
+    pointShapeSchema = extendSchema(baseShapeSchema, {
+        'properties': {
+            'type': {
+                'type': 'string',
+                'enum': ['point'],
+            },
+            'center': coordSchema,
+            'fillColor': colorSchema,
+        },
+        'required': ['type', 'center'],
+        'additionalProperties': False,
+    })
+
+    arrowShapeSchema = extendSchema(baseShapeSchema, {
+        'properties': {
+            'type': {
+                'type': 'string',
+                'enum': ['arrow'],
+            },
+            'points': {
+                'type': 'array',
+                'items': coordSchema,
+                'minItems': 2,
+                'maxItems': 2,
+            },
+            'fillColor': colorSchema,
+        },
+        'description': 'The first point is the head of the arrow',
+        'required': ['type', 'points'],
+        'additionalProperties': False,
+    })
+
+    circleShapeSchema = extendSchema(baseShapeSchema, {
+        'properties': {
+            'type': {
+                'type': 'string',
+                'enum': ['circle'],
+            },
+            'center': coordSchema,
+            'radius': {
+                'type': 'number',
+                'minimum': 0,
+            },
+            'fillColor': colorSchema,
+        },
+        'required': ['type', 'center', 'radius'],
+        'additionalProperties': False,
+    })
+
+    polylineShapeSchema = extendSchema(baseShapeSchema, {
+        'properties': {
+            'type': {
+                'type': 'string',
+                'enum': ['polyline'],
+            },
+            'points': {
+                'type': 'array',
+                'items': coordSchema,
+                'minItems': 2,
+            },
+            'fillColor': colorSchema,
+            'closed': {
+                'type': 'boolean',
+                'description': 'polyline is open if closed flag is '
+                               'not specified',
+            },
+            'holes': {
+                'type': 'array',
+                'description':
+                    'If closed is true, this is a list of polylines that are '
+                    'treated as holes in the base polygon. These should not '
+                    'cross each other and should be contained within the base '
+                    'polygon.',
+                'items': {
+                    'type': 'array',
+                    'items': coordSchema,
+                    'minItems': 3,
+                },
+            },
+        },
+        'required': ['type', 'points'],
+        'additionalProperties': False,
+    })
+
+
+def validate_annotation(annotation_dict):
+    validator = jsonschema.Draft6Validator(AnnotationSchema.annotationSchema)
+    validatorElement = jsonschema.Draft6Validator(AnnotationSchema.baseElementSchema)
+
+    validator.validate(annotation_dict)
+    for element in tqdm(annotation_dict['elements']):
+        validatorElement.validate(element)
+
+def validate_json_file(json_dst):
+    with open(json_dst, 'r') as f:
+        data = json.load(f)
+        validate_annotation(data)
+        # num_elem = len(data['elements'][0]['annotations'])
+        # if num_elem % 4 != 0:
+        #     raise ValueError(f"Number of elements ({num_elem}) is not a multiple of 4")
+        # num_values = len(data['elements'][0]['annotations'])
+        # if int(num_elem / 4) != num_values:
+        #     raise ValueError(f"Number of elements ({num_elem / 4}) does not match values ({num_values})")
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description='Validate a json annotation file')
+    parser.add_argument('--input', default=os.path.join("out", "superpixel.anot"), type=str,
+                        help='Name of input json file with a pixelmap annotation"')
+    args = parser.parse_args()
+    # Call the function with the filenames
+    logging.basicConfig(stream=sys.stdout, level=logging.INFO)
+
+    if not os.path.exists(args.input):
+        logging.error(f"Annotation path {args.input} does not exist")
+        sys.exit(1)
+
+    validate_json_file(args.input)
+    logging.info("Done validating annotation ['%s']", args.input)

From 3cde3a69d475fe4486363a1a10ea45c532b2bffa Mon Sep 17 00:00:00 2001
From: Anders Sildnes <andsild@posteo.net>
Date: Wed, 28 May 2025 10:16:40 -0500
Subject: [PATCH 4/5] Make girder client a parameter

Easier for tests
---
 .../SuperpixelClassificationBase.py              | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/superpixel_classification/SuperpixelClassification/SuperpixelClassificationBase.py b/superpixel_classification/SuperpixelClassification/SuperpixelClassificationBase.py
index 1d86403..99fd569 100644
--- a/superpixel_classification/SuperpixelClassification/SuperpixelClassificationBase.py
+++ b/superpixel_classification/SuperpixelClassification/SuperpixelClassificationBase.py
@@ -886,19 +886,21 @@ def predictLabels(self, gc, folderId, annotationName, itemsAndAnnot, features, m
                     radius, magnification, certainty, batchSize, prog)
             prog.progress(1)
 
-    def main(self, args):
+    def main(self, args, gc = None):
         self.feature_is_image = args.feature != 'vector'
         self.certainty = args.certainty
 
         print('\n>> CLI Parameters ...\n')
         pprint.pprint(vars(args))
 
-        gc = girder_client.GirderClient(apiUrl=args.girderApiUrl)
-        gc.token = args.girderToken
-        gc.authenticate('admin', 'password')
-        # dummy check to make sure we have access to server
-        if not [x for x in list(gc.listCollection()) if x['name'] == 'Active Learning']:
-            raise Exception("Unable to authenticate with girder")
+        if gc is None:
+            gc = girder_client.GirderClient(apiUrl=args.girderApiUrl)
+            gc.token = args.girderToken
+            gc.authenticate('admin', 'password')
+
+            # check to make sure we have access to server
+            if not [x for x in list(gc.listCollection()) if x['name'] == 'Active Learning']:
+                raise Exception("Unable to authenticate with girder")
 
         with ProgressHelper(
                 'Superpixel Classification', 'Superpixel classification', args.progress) as prog:

From 9111264e8b7e0ed990dd0a7bc14874bac402bde6 Mon Sep 17 00:00:00 2001
From: Anders Sildnes <andsild@posteo.net>
Date: Tue, 3 Jun 2025 09:10:53 -0500
Subject: [PATCH 5/5] Add simple script to inspect feature files

---
 tools/inspect_image_feature_file.py | 37 +++++++++++++++++++++++++++++
 1 file changed, 37 insertions(+)
 create mode 100644 tools/inspect_image_feature_file.py

diff --git a/tools/inspect_image_feature_file.py b/tools/inspect_image_feature_file.py
new file mode 100644
index 0000000..a93d911
--- /dev/null
+++ b/tools/inspect_image_feature_file.py
@@ -0,0 +1,37 @@
+'''
+This script will open a feature file (.h5) and show a 3x3 grid of images.
+This tool is useful if you suspect that features are not extracted properly, for example due to erroneous mask values/indexing.
+'''
+
+import h5py
+import matplotlib.pyplot as plt
+import numpy as np
+import sys
+
+if len(sys.argv) > 0:
+    feature_file = sys.argv[1]
+else:
+    feature_file = "features.h5"
+
+# open the file
+with h5py.File(feature_file, "r") as f:
+    # get the images dataset
+    images = f["images"]
+    # get the first 9 images
+    images = images[:9]
+    # reshape the images to 3x3
+    #images = np.reshape(images, (3,3,100,100,3))
+    # transpose the images to 3x3
+    #images = np.transpose(images, (0,2,1,3,4))
+    # flatten the images to 9x100x100x3
+    #images = np.reshape(images, (9,100,100,3))
+
+    # hide axis from pyplot
+    plt.axis('off')
+
+    # plot the images
+    for i in range(9):
+        plt.subplot(3,3,i+1)
+        plt.imshow(images[i])
+    plt.show()
+    print(f"Image {i+1} is {images[i].shape}")