From ff7f8c1a61c0005d6f444cc660e35a3fc38da1b7 Mon Sep 17 00:00:00 2001 From: Anders Sildnes Date: Tue, 25 Feb 2025 10:43:45 -0600 Subject: [PATCH 1/5] Support `cutoff` predictions for AL This is to speed up AL loop Not a perfect solution, the UI will now recommend that users predict "default" for a lot of the labels. But it is a first step to make sure we can handle large slide with millions of annotations --- .../SuperpixelClassification.xml | 7 + .../SuperpixelClassificationBase.py | 123 +++++++++++++----- .../SuperpixelClassificationTorch.py | 8 +- 3 files changed, 103 insertions(+), 35 deletions(-) diff --git a/superpixel_classification/SuperpixelClassification/SuperpixelClassification.xml b/superpixel_classification/SuperpixelClassification/SuperpixelClassification.xml index 38c7b77..4ffc03d 100644 --- a/superpixel_classification/SuperpixelClassification/SuperpixelClassification.xml +++ b/superpixel_classification/SuperpixelClassification/SuperpixelClassification.xml @@ -198,5 +198,12 @@ 4 The number of worker threads for superpixel and feature generation + + cutoff + cutoff + + 500 + Number of unannotated superpixels to use per slide for features, training and predictions + diff --git a/superpixel_classification/SuperpixelClassification/SuperpixelClassificationBase.py b/superpixel_classification/SuperpixelClassification/SuperpixelClassificationBase.py index adc1148..d7aa4d7 100644 --- a/superpixel_classification/SuperpixelClassification/SuperpixelClassificationBase.py +++ b/superpixel_classification/SuperpixelClassification/SuperpixelClassificationBase.py @@ -332,7 +332,7 @@ def createFeatureListFromPatchAndMaskList(self, patch_list, mask_list, maskvals_ ) return feature_list - def createFeaturesForItem(self, gc, item, elem, featureFolderId, fileName, patchSize, prog): + def createFeaturesForItem(self, gc, item, elem, featureFolderId, fileName, patchSize, prog, cutoff): import large_image print('Create feature', fileName) @@ -349,17 +349,31 @@ def createFeaturesForItem(self, gc, item, elem, featureFolderId, fileName, patch gc.downloadFile(maskItem['largeImage']['fileId'], maskPath) tsMask = large_image.open(maskPath) + num_values = len(elem['values']) + labeled_samples = set([i for i, x in enumerate(elem['values']) if x > 0]) + unlabeled_samples = [i for i, x in enumerate(elem['values']) if x == 0] + if num_values - len(labeled_samples) > cutoff: + # only select a subset of unlabeled samples, i.e., prune the feature list + random.shuffle(unlabeled_samples) + unlabeled_samples = unlabeled_samples[:cutoff] + indices = list(sorted(list(labeled_samples) + unlabeled_samples)) + with h5py.File(filePath, 'w') as fptr: batch_size = 1024 # TODO: Is this the best value? - for batch_start in range(0, len(elem['values']), batch_size): - batch_list = elem['values'][batch_start: batch_start + batch_size] + total_size = len(indices) + for batch_start in range(0, total_size, batch_size): + #batch_list = elem['values'][batch_start: batch_start + batch_size] + batch_list = indices[batch_start: batch_start + batch_size] patch_list = [] mask_list = [] maskvals_list = [] - for idx, _ in enumerate(batch_list, start=batch_start): - prog.item_progress(item, 0.9 * idx / len(elem['values'])) - bbox = elem['user']['bbox'][idx * 4: idx * 4 + 4] + + for idx, i in enumerate(batch_list, start=batch_start): + prog.item_progress(item, 0.9 * idx / total_size) + bbox = elem['user']['bbox'][i * 4: i * 4 + 4] # use masked superpixel + if len(bbox) < 4: + pass patch = ts.getRegion( region=dict( left=int(bbox[0]), top=int(bbox[1]), @@ -409,6 +423,8 @@ def createFeaturesForItem(self, gc, item, elem, featureFolderId, fileName, patch (time.time() - starttime)), item['name']) del batch_list, patch_list, mask_list, maskvals_list, feature_list + used_indices_ds = fptr.create_dataset( + 'used_indices', data=np.array(indices), dtype='i') print(ds.shape, len(elem['values']), '%5.3f' % (time.time() - starttime), item['name']) prog.item_progress(item, 0.9) @@ -418,9 +434,9 @@ def createFeaturesForItem(self, gc, item, elem, featureFolderId, fileName, patch prog.item_progress(item, 1) return file - def createFeatures(self, gc, folderId, annotationName, featureFolderId, patchSize, numWorkers, - prog): - itemsAndAnnot = self.getItemsAndAnnotations(gc, folderId, annotationName) + def createFeatures(self, gc, folderId, annotationName, itemsAndAnnot, featureFolderId, patchSize, numWorkers, + prog, cutoff): + # itemsAndAnnot = self.getItemsAndAnnotations(gc, folderId, annotationName) prog.message('Creating features') prog.progress(0) prog.items([item for item, _, _ in itemsAndAnnot]) @@ -449,7 +465,7 @@ def createFeatures(self, gc, folderId, annotationName, featureFolderId, patchSiz else: futures.append((item, executor.submit( self.createFeaturesForItem, gc, item, elem, featureFolderId, - '%s.feature.h5' % (item['name']), patchSize, prog))) + '%s.feature.h5' % (item['name']), patchSize, prog, cutoff))) for item, future in futures: file = future.result() try: @@ -471,8 +487,13 @@ def trainModelAddItem(self, gc, record, item, annotrec, elem, feature, gc.downloadFile(feature['_id'], featurePath) with h5py.File(featurePath, 'r') as ffptr: fds = ffptr['images'] - for idx, labelnum in enumerate(elem['values']): - if labelnum and labelnum < len(elem['categories']): + if 'used_indices' in ffptr: + indices = ffptr['used_indices'] + else: + indices = range(len(elem['values'])) + for i,idx in enumerate(indices): + labelnum = elem['values'][idx] + if 0 < labelnum < len(elem['categories']): labelname = elem['categories'][labelnum]['label'] if labelname in excludeLabelList: continue @@ -483,7 +504,7 @@ def trainModelAddItem(self, gc, record, item, annotrec, elem, feature, labelname = labelList[labelnum - 1] else: continue - patch = fds[idx] + patch = fds[i] if not record['ds']: record['ds'] = record['fptr'].create_dataset( 'images', (1,) + patch.shape, maxshape=(None,) + patch.shape, @@ -503,10 +524,9 @@ def trainModelAddItem(self, gc, record, item, annotrec, elem, feature, print(record['ds'].shape, record['counts'], '%5.3f' % (time.time() - record['starttime'])) - def trainModel(self, gc, folderId, annotationName, features, modelFolderId, + def trainModel(self, gc, annotationName, itemsAndAnnot, features, modelFolderId, batchSize, epochs, trainingSplit, randomInput, labelList, excludeLabelList, prog): - itemsAndAnnot = self.getItemsAndAnnotations(gc, folderId, annotationName) with tempfile.TemporaryDirectory(dir=os.getcwd()) as tempdir: trainingPath = os.path.join(tempdir, 'training.h5') with h5py.File(trainingPath, 'w') as fptr: @@ -534,7 +554,7 @@ def trainModel(self, gc, folderId, annotationName, features, modelFolderId, prog.progress(1) if not record['ds']: print('No labeled data') - return + return None, None record['labelds'] = fptr.create_dataset( 'labels', (len(record['labelvals']),), dtype=int) record['labelds'] = np.array(record['labelvals'], dtype=int) @@ -566,7 +586,7 @@ def trainModel(self, gc, folderId, annotationName, features, modelFolderId, print('Saved modTraining') return modelFile, modTrainingFile - def predictLabelsForItem(self, gc, annotationName, annotationFolderId, tempdir, model, item, + def predictLabelsForItem(self, gc, annotationName, tempdir, model, item, annotrec, elem, feature, curEpoch, userId, labels, groups, makeHeatmaps, radius, magnification, certainty, batchSize, prog): import al_bench.factory @@ -579,6 +599,8 @@ def predictLabelsForItem(self, gc, annotationName, annotationFolderId, tempdir, # Figure out which samples are already labeled labeled_samples: NDArray[np.int_] = np.nonzero(np.array(elem['values'])) + number_annotations = len(elem['values']) + tiny = np.finfo(np.float32).tiny print(f'{labeled_samples = }') print(f'certainty_type = {certainty!r}') @@ -589,9 +611,17 @@ def predictLabelsForItem(self, gc, annotationName, annotationFolderId, tempdir, # In case we are computing batchbald compCertainty.set_batchbald_num_samples(16) compCertainty.set_batchbald_batch_size(100) - compCertainty.set_batchbald_excluded_samples(labeled_samples) + #compCertainty.set_batchbald_excluded_samples(labeled_samples) with h5py.File(featurePath, 'r') as ffptr: + if 'used_indices' in ffptr: + used_indices = set(list(ffptr['used_indices'])) + else: + used_indices = set(range(number_annotations)) + all_indices = set(range(number_annotations)) + unused_indices = list(sorted(all_indices.difference(used_indices))) + compCertainty.set_batchbald_excluded_samples(np.array(unused_indices)) + prog.item_progress(item, 0) # Create predicted annotation annot = copy.deepcopy(annotrec) @@ -600,21 +630,29 @@ def predictLabelsForItem(self, gc, annotationName, annotationFolderId, tempdir, annot['elements'][0]['categories'] = [groups[key] for key in labels] ds = ffptr['images'] prog.item_progress(item, 0.05) - catWeights, predictions = self.predictLabelsForItemDetails( - batchSize, ds, item, model, prog) - catWeights = np.array(catWeights) - predictions = np.array(predictions) + _catWeights, _predictions, indices = self.predictLabelsForItemDetails( + batchSize, ds, np.array(list(used_indices), dtype=np.int64), item, model, use_cuda, prog) + # expand catWeights and predictions to be length of elem['values'] instead of just `cutoff` samples + # then copy in results from predictions + catWeights = np.zeros((number_annotations,) + _catWeights.shape[1:], dtype=np.float32 if str(_catWeights.dtype).endswith("32") else np.float64) + predictions = np.zeros((number_annotations,) + _predictions.shape[1:], dtype=np.float32 if str(_predictions.dtype).endswith("32") else np.float64) + for cw,p,idx in zip(_catWeights, _predictions, indices): + catWeights[idx] = cw + predictions[idx] = p + print_fully('predictions', predictions) prog.item_progress(item, 0.7) # compCertainty needs catWeights to have shape (num_superpixels, # bayesian_samples, num_classes) if 'batchbald' is selected, otherwise the # shape should be (num_superpixels, num_classes). - print_fully('catWeights', catWeights) # Ask compCertainty to compute certainties - cert = compCertainty.from_numpy_array(catWeights) + cert = compCertainty.from_numpy_array(catWeights + tiny) + print_fully('catWeights', catWeights) + # After the call to compCertainty, those numbers that end up as values for # annot's keys 'values', 'confidence', 'categoryConfidence', and 'certainty' # should have shape (num_superpixels, num_classes). + print_fully('cert', cert) scores = cert[certainty]['scores'] print_fully('scores', scores) @@ -625,14 +663,26 @@ def predictLabelsForItem(self, gc, annotationName, annotationFolderId, tempdir, epsilon = 1e-50 predictions = np.log(catWeights + epsilon) cats = np.argmax(catWeights, axis=-1) - indices = np.arange(cats.shape[0]) - conf = catWeights[indices, cats[indices]] + conf = catWeights[list(all_indices), cats[np.arange(cats.shape[0])]] print_fully('cats', cats) print_fully('conf', conf) + # give unused_indices the highest possible confidence so that they show up last in the active learning UI + # (because it sorts by confidence in descending order) + scores[unused_indices] = np.finfo(scores.dtype).max + # additionally, ensure that labels that are already labeled also end up last or late in the recommendations + # for the DSA UI, this prevents labeled samples from being shown again to the user + scores[labeled_samples] = np.finfo(scores.dtype).max + + # additionally, ensure that labels that are already labeled also end up last or late in the recommendations + # for the DSA UI, this prevents labeled samples from being shown again to the user + scores[labeled_samples] = np.finfo(scores.dtype).max + cats = cats.tolist() conf = conf.tolist() - # Should this be from predictions for from catWeights?!!! + + # Should this be from predictions or from catWeights?!!! + predictions[np.isneginf(predictions)] = np.finfo(predictions.dtype).min catConf = predictions.tolist() scores = scores.tolist() annot['elements'][0]['values'] = cats @@ -769,10 +819,10 @@ def makeHeatmapsForItem(self, gc, annotationName, userId, tempdir, radius, item, 'fileId': item['largeImage']['fileId'], 'userId': userId})) - def predictLabels(self, gc, folderId, annotationName, features, modelFolderId, + def predictLabels(self, gc, folderId, annotationName, itemsAndAnnot, features, modelFolderId, annotationFolderId, saliencyMaps, radius, magnification, certainty, batchSize, prog): - itemsAndAnnot = self.getItemsAndAnnotations(gc, folderId, annotationName) + #itemsAndAnnot = self.getItemsAndAnnotations(gc, folderId, annotationName) curEpoch = self.getCurrentEpoch(itemsAndAnnot) folder = gc.getFolder(folderId) userId = folder['creatorId'] @@ -845,6 +895,10 @@ def main(self, args): gc = girder_client.GirderClient(apiUrl=args.girderApiUrl) gc.token = args.girderToken + gc.authenticate('admin', 'password') + # dummy check to make sure we have access to server + if not [x for x in list(gc.listCollection()) if x['name'] == 'Active Learning']: + raise Exception("Unable to authenticate with girder") with ProgressHelper( 'Superpixel Classification', 'Superpixel classification', args.progress) as prog: @@ -853,16 +907,19 @@ def main(self, args): gc, args.images, args.annotationName, args.radius, args.magnification, args.annotationDir, args.numWorkers, prog) + itemsAndAnnot = self.getItemsAndAnnotations(gc, args.images, args.annotationName) features = self.createFeatures( - gc, args.images, args.annotationName, args.features, args.patchSize, - args.numWorkers, prog) + gc, args.images, args.annotationName, itemsAndAnnot, args.features, args.patchSize, + args.numWorkers, prog, args.cutoff) if args.train: + print("Training...") self.trainModel( - gc, args.images, args.annotationName, features, args.modeldir, args.batchSize, + gc, args.images, args.annotationName, itemsAndAnnot, features, args.modeldir, args.batchSize, args.epochs, args.split, args.randominput, args.labels, args.exclude, prog) + print("Predicting labels...") self.predictLabels( - gc, args.images, args.annotationName, features, args.modeldir, args.annotationDir, + gc, args.images, args.annotationName, itemsAndAnnot, features, args.modeldir, args.annotationDir, args.heatmaps, args.radius, args.magnification, args.certainty, args.batchSize, prog) diff --git a/superpixel_classification/SuperpixelClassification/SuperpixelClassificationTorch.py b/superpixel_classification/SuperpixelClassification/SuperpixelClassificationTorch.py index e06d247..243c520 100644 --- a/superpixel_classification/SuperpixelClassification/SuperpixelClassificationTorch.py +++ b/superpixel_classification/SuperpixelClassification/SuperpixelClassificationTorch.py @@ -532,12 +532,13 @@ def predictLabelsForItemDetails( for cb in callbacks: cb.on_predict_begin(logs=logs) + # ds also needs to have information about the indices so that we can shuffle the data but still link it to an index ds: torch.utils.data.TensorDataset = torch.utils.data.TensorDataset( ( torch.from_numpy(np.array(ds_h5).transpose((0, 3, 2, 1))) if self.feature_is_image else torch.from_numpy(np.array(ds_h5)) - ), + ), torch.from_numpy(indices), ) if batchSize < 1: batchSize = self.findOptimalBatchSize(model, ds, training=False) @@ -545,6 +546,7 @@ def predictLabelsForItemDetails( dl: torch.utils.data.DataLoader = torch.utils.data.DataLoader(ds, batch_size=batchSize) predictions: NDArray[np.float_] = np.zeros((num_superpixels, bayesian_samples, num_classes)) catWeights: NDArray[np.float_] = np.zeros((num_superpixels, bayesian_samples, num_classes)) + outIndices: NDArray[np.int64] = np.zeros(num_superpixels, dtype=np.int64) with torch.no_grad(): model.eval() # Tell torch that we will be doing predictions row: int = 0 @@ -567,6 +569,8 @@ def predictLabelsForItemDetails( catWeights_raw = torch.nn.functional.softmax(predictions_raw, dim=-1) predictions[row:new_row, :, :] = predictions_raw.detach().cpu().numpy() catWeights[row:new_row, :, :] = catWeights_raw.detach().cpu().numpy() + outIndices[row:new_row] = data[1].detach().cpu().numpy().astype(np.int64)[:] + row = new_row for cb in callbacks: cb.on_predict_batch_end(i) @@ -574,7 +578,7 @@ def predictLabelsForItemDetails( cb.on_predict_end({'outputs': predictions}) prog.item_progress(item, 0.4) # scale to units - return catWeights, predictions + return catWeights, predictions, outIndices def findOptimalBatchSize( self, model: torch.nn.Module, ds: torch.utils.data.TensorDataset, training: bool, From 207067ad1e6fac3075f68a4110cb4775039dfdd2 Mon Sep 17 00:00:00 2001 From: Anders Sildnes Date: Wed, 21 May 2025 18:14:34 -0500 Subject: [PATCH 2/5] Bugfix: use global index, not batch index, for bounding boxes This may not have been a bug before, but now when indices may not be in order (since we are using `cutoff`), it becomes relevant --- .../SuperpixelClassification/SuperpixelClassificationBase.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/superpixel_classification/SuperpixelClassification/SuperpixelClassificationBase.py b/superpixel_classification/SuperpixelClassification/SuperpixelClassificationBase.py index d7aa4d7..1d86403 100644 --- a/superpixel_classification/SuperpixelClassification/SuperpixelClassificationBase.py +++ b/superpixel_classification/SuperpixelClassification/SuperpixelClassificationBase.py @@ -398,7 +398,7 @@ def createFeaturesForItem(self, gc, item, elem, featureFolderId, fileName, patch if mask.shape[2] == 4: mask = mask[:, :, :-1] maskvals = [[val % 256, val // 256 % 256, val // 65536 % 256] - for val in [idx * 2, idx * 2 + 1]] + for val in [(i + 1) * 2, (i + 1) * 2 + 1]] patch_list.append(patch) mask_list.append(mask) maskvals_list.append(maskvals) From 68293c3d36640abdc5f7c7b3207ee6dcacf4b228 Mon Sep 17 00:00:00 2001 From: Anders Sildnes Date: Mon, 12 May 2025 13:25:13 -0500 Subject: [PATCH 3/5] Add simple tests for features, training, pred --- .../tests/generate_MNIST_image.py | 159 +++++ .../tests/test_feature_extract.py | 218 +++++++ .../tests/test_full_training_cycle.py | 524 ++++++++++++++++ .../tests/test_gen_superpixels.py | 164 +++++ .../tests/test_predict.py | 254 ++++++++ .../tests/test_tensorflow.py | 93 +++ .../tests/test_torch.py | 94 +++ .../tests/validate_json_annotation.py | 588 ++++++++++++++++++ 8 files changed, 2094 insertions(+) create mode 100644 superpixel_classification/SuperpixelClassification/tests/generate_MNIST_image.py create mode 100644 superpixel_classification/SuperpixelClassification/tests/test_feature_extract.py create mode 100644 superpixel_classification/SuperpixelClassification/tests/test_full_training_cycle.py create mode 100644 superpixel_classification/SuperpixelClassification/tests/test_gen_superpixels.py create mode 100644 superpixel_classification/SuperpixelClassification/tests/test_predict.py create mode 100644 superpixel_classification/SuperpixelClassification/tests/test_tensorflow.py create mode 100644 superpixel_classification/SuperpixelClassification/tests/test_torch.py create mode 100644 superpixel_classification/SuperpixelClassification/tests/validate_json_annotation.py diff --git a/superpixel_classification/SuperpixelClassification/tests/generate_MNIST_image.py b/superpixel_classification/SuperpixelClassification/tests/generate_MNIST_image.py new file mode 100644 index 0000000..9d7e121 --- /dev/null +++ b/superpixel_classification/SuperpixelClassification/tests/generate_MNIST_image.py @@ -0,0 +1,159 @@ +#!/usr/bin/env python +''' +Generate a .tiff with numbers from MNIST +''' + +import os +import argparse +import random + +import numpy as np +import pandas as pd +import tifffile +from PIL import Image +from torchvision.datasets import MNIST + +def parse_args(): + # Parse arguments + parser = argparse.ArgumentParser(description="Generate a pyramidal MNIST image.") + parser.add_argument('--root_dataset_path', type=str, default="/data/aza4423_anders/mnist", help='Path to download and store MNIST dataset') + #parser.add_argument('--num_images', type=int, default=244 * 244, help='Number of random MNIST images to use') + parser.add_argument('--num_images', type=int, default=4, help='Number of random MNIST images to use') + parser.add_argument('--output_path', type=str, default="/data/aza4423_anders/aml-dsa/mnist_pyramid.tif", help='Output path for the pyramidal TIF file') + parser.add_argument('--test', default=False, type=bool, action=argparse.BooleanOptionalAction, + metavar='T', + help='whether to use test MNIST or train' + ) + + args = parser.parse_args() + + return args + +def d_to_rgb(d): + r = d & 0xFF + g = (d >> 8) & 0xFF + b = (d >> 16) & 0xFF + return [r, g, b] + + +def create_mnist_image(root_dataset_path=".", num_images=100, output_path="./out", test=False, start_value=0): + # verify that num_images has a square root; otherwise we'd have to insert blank tiles for the uneven grid + assert num_images % np.sqrt(num_images) == 0 + + # Download MNIST (if not already downloaded) + dataset = MNIST(root=root_dataset_path, train=not test, download=True) + + # Select N random MNIST images (each image is PIL.Image in mode "L") + # (Make the number square-rootable) + num_images = num_images # Number of images from argument + # oversample if we want more images than the length of MNIST + if num_images > len(dataset): + indices = random.choices(range(len(dataset)), k=num_images) + else: + indices = list(range(num_images)) + random.shuffle(indices) + + #indices = random.sample(range(len(dataset)), num_images) + mnist_images = [np.array(dataset[i][0]) for i in indices] # each is 28x28, uint8 + mnist_labels = [np.array(dataset[i][1]) for i in indices] + + # Arrange the images in a grid (so num_images should be a number with an integer root) + tile_rows, tile_cols = int(np.sqrt(num_images)), int(np.sqrt(num_images)) + tile_h, tile_w = mnist_images[0].shape # typically 28x28 + grid_h, grid_w = tile_rows * tile_h, tile_cols * tile_w + base_image = np.zeros((grid_h, grid_w, 3), dtype=np.uint8) + pm_image = np.zeros((grid_h, grid_w, 3), dtype=np.uint8) + + for idx, img in enumerate(mnist_images): + r = idx // tile_cols + c = idx % tile_cols + # convert img to RGB + rgb_img = np.stack([img, img, img], axis=-1) + base_image[r*tile_h:(r+1)*tile_h, c*tile_w:(c+1)*tile_w, :] = rgb_img + + value_img = np.zeros((tile_h, tile_w, 3), dtype=np.uint8) + i = (idx + 1) * 2 + rgb = d_to_rgb(i + start_value) + value_img[1:-1, 1:-1] = rgb + rgb = d_to_rgb(i + start_value + 1) + value_img[0, :] = rgb + value_img[-1, :] = rgb + value_img[:, 0] = rgb + value_img[:, -1] = rgb + + pm_image[r*tile_h:(r+1)*tile_h, c*tile_w:(c+1)*tile_w, :] = value_img + + + # Note: We assume that the base level corresponds to 40x magnification. + # Now, build a pyramid (list of downsampled images). + pyramid_pm = [pm_image] + pm_current = pm_image.copy() + + pyramid = [base_image] + current = base_image.copy() + # Continue downsampling by a factor of 2 until one dimension becomes very small. + while min(current.shape) >= 64: + # Use Pillow to resize (ANTIALIAS gives good quality downsampling) + im = Image.fromarray(current) + new_w, new_h = current.shape[1] // 2, current.shape[0] // 2 + if new_w < 1 or new_h < 1: + break + im_resized = im.resize((new_w, new_h)) + current = np.array(im_resized) + pyramid.append(current) + + im = Image.fromarray(pm_image) + new_w, new_h = pm_current.shape[1] // 2, pm_current.shape[0] // 2 + if new_w < 1 or new_h < 1: + break + im_resized = im.resize((new_w, new_h)) + pm_current = np.array(im_resized) + pyramid_pm.append(current) + + # Save the image as a pyramidal TIFF. + # The base image is the main image and the pyramid list (excluding the base) is saved as subIFDs. + output_filename = output_path # Use the output path from argument + if os.path.dirname(output_filename): + os.makedirs(os.path.dirname(output_filename), exist_ok=True) + if os.path.exists(output_filename): + os.remove(output_filename) + + with tifffile.TiffWriter(output_filename, bigtiff=False) as tif: + tif.write(pyramid[0], + tile=(tile_w * 4, tile_h * 4), + photometric='RGB', + description='Whole-slide MNIST image at 40x magnification', + subifds=pyramid[1:]) + print(f"Pyramidal TIFF saved as {output_filename}") + + output_filename_pm = output_filename + ".pixelmap.tiff" # Use the output path from argument + if os.path.dirname(output_filename_pm): + os.makedirs(os.path.dirname(output_filename_pm), exist_ok=True) + if os.path.exists(output_filename_pm): + os.remove(output_filename_pm) + with tifffile.TiffWriter(output_filename_pm, bigtiff=False) as tif: + tif.write(pyramid_pm[0], + tile=(tile_w * 4, tile_h * 4), + photometric='RGB', + description='Pixelmap for Whole-slide MNIST image at 40x magnification', + subifds=pyramid_pm[1:]) + print(f"Pyramidal TIFF saved as {output_filename_pm}") + + # generate a corresponding CSV "cells" file + # with headers "x,y,w,h" for each image + csv_filename = output_filename + "_cells.csv" + with open(csv_filename, 'w') as f: + f.write("x,y,w,h,value\n") + i = 0 + for r in range(tile_rows): + for c in range(tile_cols): + x, y = c * tile_w, r * tile_h + f.write(f"{x},{y},{tile_w},{tile_h},{mnist_labels[i]}\n") + i += 1 + df = pd.read_csv(csv_filename, header=0) + print(f"Annotation file saved as {csv_filename}") + return output_filename, output_filename_pm, df + +if __name__ == "__main__": + _args = parse_args() + create_mnist_image(_args.root_dataset_path, _args.num_images, _args.output_path, _args.test) diff --git a/superpixel_classification/SuperpixelClassification/tests/test_feature_extract.py b/superpixel_classification/SuperpixelClassification/tests/test_feature_extract.py new file mode 100644 index 0000000..2c17864 --- /dev/null +++ b/superpixel_classification/SuperpixelClassification/tests/test_feature_extract.py @@ -0,0 +1,218 @@ +import os +import shutil +import sys +import tempfile +from unittest.mock import MagicMock + +import h5py +import large_image +import numpy as np +import pytest + +# make pythonpath work out of the box - although your editor may complain +SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__)) +sys.path.append(os.path.dirname(SCRIPT_DIR)) + +from SuperpixelClassificationBase import SuperpixelClassificationBase +from progress_helper import ProgressHelper +from tests.generate_MNIST_image import create_mnist_image + +from xdg_base_dirs import ( xdg_cache_home, ) + +NUM_IMAGES = 64 + +@pytest.fixture(scope="session") +def create_sample_data(): + global NUM_IMAGES + with tempfile.TemporaryDirectory() as tmpdirname: + tiff_path = os.path.join(tmpdirname, "test_mnist.tiff") + #tiff_path_pm = os.path.join(tmpdirname, "test_mnist.tiff.pixelmap.tiff") + + tiff_path, tiff_path_pm, labels = create_mnist_image( + root_dataset_path=xdg_cache_home(), + num_images=NUM_IMAGES, + output_path=tiff_path, + test=False, + ) + # 0 is background + labels['value'] = labels['value'] + 1 + + # we use yield so that the temporarydirectory is still open in the tests + yield tiff_path, tiff_path_pm, NUM_IMAGES, labels + +MNIST_IMAGE_SIZE=28 +COLOR_DIM = 3 + +def test_cutoff(create_sample_data): + global MNIST_IMAGE_SIZE, COLOR_DIM + test_image_pth, test_image_pth_pm, num_images, labels = create_sample_data + base = SuperpixelClassificationBase() + + # Create test data + item = { + 'name': test_image_pth, + 'largeImage': {'fileId': 'test_image_id'} + } + + # Mock girder client + gc = MagicMock() + def mv_to_dst(_, dst): + if "pixelmap" in dst: + if not os.path.exists(dst): + return shutil.copy(test_image_pth_pm, dst) + else: + if not os.path.exists(dst): + return shutil.copy(test_image_pth, dst) + return None + gc.downloadFile = MagicMock(side_effect=mv_to_dst) + gc.getItem = MagicMock(return_value={'name': test_image_pth_pm, 'largeImage': {'fileId': 'foobar'}}) + def mv_to_src(_, src): + dst = os.path.dirname(test_image_pth) + return shutil.copy(src, dst) + gc.uploadFileToFolder = MagicMock(side_effect=mv_to_src, return_value={'_id': 'test_file_id'}) + #gc.uploadFileToFolder = MagicMock(return_value={'_id': 'test_file_id'}) + + bboxes = [[x, y, w + x, y + h] for _, (x, y, w, h) in labels[['x', 'y', 'w', 'h']].iterrows()] + + elem = { + 'girderId': 'test_girder_id', + 'values': + [] \ + + list(labels['value'])[:-2] + + [0, 0], # last two images unlabeled + 'user': { + 'bbox': [item for sublist in bboxes for item in sublist] + }, + 'transform': {'matrix': [[1.0]]} + } + + filename = 'test_features.h5' + h5_file = os.path.join(os.path.dirname(test_image_pth), filename) + if os.path.exists(h5_file): + os.remove(h5_file) + + assert not os.path.exists(h5_file) + + cutoff = 1 + with ProgressHelper( 'Superpixel Classification', + 'Test feature', False) as prog: + prog.progress(0) + prog.items([item]) + result = base.createFeaturesForItem( + gc=gc, + item=item, + elem=elem, + featureFolderId='test_folder_id', + fileName=filename, + patchSize=MNIST_IMAGE_SIZE, + prog=prog, + cutoff=cutoff, + ) + + assert os.path.exists(h5_file), f"Output file {h5_file} does not exist" + with h5py.File(h5_file, 'r') as ffptr: + assert 'images' in ffptr + assert ffptr['images'].shape == (NUM_IMAGES - cutoff, MNIST_IMAGE_SIZE, MNIST_IMAGE_SIZE, COLOR_DIM) + assert len(ffptr['used_indices']) == NUM_IMAGES - cutoff # number of labeled - cutoff + +def test_create_features_for_item(create_sample_data): + global MNIST_IMAGE_SIZE, COLOR_DIM + test_image_pth, test_image_pth_pm, num_images, labels = create_sample_data + base = SuperpixelClassificationBase() + + # Create test data + item = { + 'name': test_image_pth, + 'largeImage': {'fileId': 'test_image_id'} + } + + # Mock girder client + gc = MagicMock() + def mv_to_dst(_, dst): + if "pixelmap" in dst: + if not os.path.exists(dst): + return shutil.copy(test_image_pth_pm, dst) + else: + if not os.path.exists(dst): + return shutil.copy(test_image_pth, dst) + return None + gc.downloadFile = MagicMock(side_effect=mv_to_dst) + gc.getItem = MagicMock(return_value={'name': test_image_pth_pm, 'largeImage': {'fileId': 'foobar'}}) + def mv_to_src(_, src): + dst = os.path.dirname(test_image_pth) + return shutil.copy(src, dst) + gc.uploadFileToFolder = MagicMock(side_effect=mv_to_src, return_value={'_id': 'test_file_id'}) + #gc.uploadFileToFolder = MagicMock(return_value={'_id': 'test_file_id'}) + + bboxes = [[x, y, w + x, y + h] for _, (x, y, w, h) in labels[['x', 'y', 'w', 'h']].iterrows()] + + elem = { + 'girderId': 'test_girder_id', + 'values': + [] \ + + list(labels['value'])[:-2] + + [0, 0], # last two images unlabeled + 'user': { + 'bbox': [item for sublist in bboxes for item in sublist] + }, + 'transform': {'matrix': [[1.0]]} + } + + filename = 'test_features.h5' + h5_file = os.path.join(os.path.dirname(test_image_pth), filename) + if os.path.exists(h5_file): + os.remove(h5_file) + + assert not os.path.exists(h5_file) + + with ProgressHelper( 'Superpixel Classification', + 'Test feature', False) as prog: + prog.progress(0) + prog.items([item]) + result = base.createFeaturesForItem( + gc=gc, + item=item, + elem=elem, + featureFolderId='test_folder_id', + fileName=filename, + patchSize=MNIST_IMAGE_SIZE, + prog=prog, + cutoff=9999 + ) + + assert os.path.exists(h5_file), f"Output file {h5_file} does not exist" + with h5py.File(h5_file, 'r') as ffptr: + assert 'images' in ffptr + assert ffptr['images'].shape == (num_images, MNIST_IMAGE_SIZE, MNIST_IMAGE_SIZE, COLOR_DIM) + feature_img = ffptr['images'][0] + # open test_image_pth using coordinates [x,y,w,h] from elem['user']['bbox'][:4] and make sure it's pixel-equal with first_img + x, y, x2, y2 = elem['user']['bbox'][:4] + ts = large_image.getTileSource(test_image_pth) + orig_image = ts.getRegion( + region=dict(left=x, top=y, right=x2, bottom=y2), + format=large_image.tilesource.TILE_FORMAT_NUMPY + )[0] + orig_image = orig_image.astype(feature_img.dtype) + print(orig_image.dtype) + np.testing.assert_array_equal(orig_image, feature_img) + + # also check that the last image matches + feature_img = ffptr['images'][-1] + x, y, x2, y2 = elem['user']['bbox'][-4:] + ts = large_image.getTileSource(test_image_pth) + orig_image = ts.getRegion( + region=dict(left=x, top=y, right=x2, bottom=y2), + format=large_image.tilesource.TILE_FORMAT_NUMPY + )[0] + orig_image = orig_image.astype(feature_img.dtype) + print(orig_image.dtype) + np.testing.assert_array_equal(orig_image, feature_img) + + assert 'used_indices' in ffptr + assert len(ffptr['used_indices']) == num_images + + # Assertions + assert result == h5_file + assert gc.downloadFile.call_count == 2 # Called for both image and mask + assert gc.getItem.call_count == 1 + assert gc.uploadFileToFolder.call_count == 1 diff --git a/superpixel_classification/SuperpixelClassification/tests/test_full_training_cycle.py b/superpixel_classification/SuperpixelClassification/tests/test_full_training_cycle.py new file mode 100644 index 0000000..03c6b8a --- /dev/null +++ b/superpixel_classification/SuperpixelClassification/tests/test_full_training_cycle.py @@ -0,0 +1,524 @@ +''' +This file contains tests for a full training cycle: extracting superpixels, training and evaluation. +The "cycle" is: + 1. generate NUM_WSIS different whole slide images using numbers from MNIST. + 2. extract features from said images. + 3. train a model on the features. + 4. evaluate the model on the features. +We expect an accuracy of at least 90%. + +This test is to verify that the training cycle works as expected. +Since there is batching involved, we want to use a larger number of samples instead of just a quick mini-test, as found in the other files. +''' +import argparse +import glob +import json +import os +import re +import shutil +import sys +import tempfile +from unittest.mock import MagicMock + +import numpy as np +import pytest +from xdg_base_dirs import (xdg_cache_home, ) + +# make pythonpath work out of the box - although your editor may complain +SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__)) +sys.path.append(os.path.dirname(SCRIPT_DIR)) + +from SuperpixelClassificationBase import SuperpixelClassificationBase +from SuperpixelClassificationTensorflow import SuperpixelClassificationTensorflow +from SuperpixelClassificationTorch import SuperpixelClassificationTorch +from tests.generate_MNIST_image import create_mnist_image + +NUM_WSIS = 2 +MNIST_IMAGE_SIZE = 28 +NUM_IMAGES_PER_WSI = 10 ** 2 +COLOR_DIM = 3 +PATCH_SIZE = 100 # only size compatible with pytorch model for the time being (since there are hardcoded sizes in the definition of the model) +NUM_EPOCHS = 5 + +@pytest.fixture(scope="function") +def create_sample_data(request): + global NUM_WSIS, NUM_IMAGES_PER_WSI + wsi_paths, pm_paths, list_labels = [], [], [] + with tempfile.TemporaryDirectory() as tmpdirname: + for i in range(NUM_WSIS): + tiff_path = os.path.join(tmpdirname, f"test_mnist_{i}.tiff") + + tiff_path, tiff_path_pm, labels = create_mnist_image( + root_dataset_path=xdg_cache_home(), + num_images=NUM_IMAGES_PER_WSI, + output_path=tiff_path, + test=False, + start_value = request.param + ) + # where labels['value'] == 0, put 10 instead, since 0 will be reserved for unlabeled + labels.loc[labels['value'] == 0, 'value'] = 10 + + wsi_paths.append(tiff_path) + pm_paths.append(tiff_path_pm) + list_labels.append(labels) + + # we use yield so that the temporarydirectory is still open in the tests + yield wsi_paths, pm_paths, NUM_WSIS, list_labels + +@pytest.mark.skipif("RUNALL" not in os.environ, reason="this is a slow test (~5-10 min), run only if you want to") +@pytest.mark.parametrize('create_sample_data', [0], indirect=True) +def test_main_pytorch(create_sample_data): + global NUM_WSIS, PATCH_SIZE, MNIST_IMAGE_SIZE, COLOR_DIM, NUM_EPOCHS + tiff_paths, tiff_path_pms, num_images, labels = create_sample_data + base: SuperpixelClassificationBase = SuperpixelClassificationTorch() + + annotation_name = 'torchMNISTtest' + config = dict( + annotationDir = 'annotationdir', + annotationName = annotation_name, + batchSize = int(np.sqrt(NUM_IMAGES_PER_WSI)), # one row of the wsi at a time + certainty = 'batchbald', + cutoff = 600000, # plenty of space to allow all training samples + epochs = NUM_EPOCHS, + exclude = [], + feature = 'patch', + features = 'featuredir', + gensuperpixels = False, + girderApiUrl = 'http://localhost:8080/api/v1', + girderToken = '', + heatmaps = False, + images = 'imagedir', + labels = '', + magnification = 40.0, + modeldir = '', + numWorkers = 1, + patchSize = PATCH_SIZE, + radius = MNIST_IMAGE_SIZE, + randominput = False, + split = 0.7, + train = True, + useCuda = True, + progress = True, + ) + args = argparse.Namespace(**config) + + mnist_labels = ['default', '1', '2', '3', '4', '5', '6', '7', '8', '9', '0'] + + items = [] + for i in range(NUM_WSIS): + bboxes = [[x, y, w + x, y + h] for _, (x, y, w, h) in labels[i][['x', 'y', 'w', 'h']].iterrows()] + elem = { + 'girderId': f'test_girder_id{i}', + 'categories': [ + {"label": c} for c in mnist_labels + ], + 'values': labels[i]['value'].tolist(), + 'user': { + 'bbox': [item for sublist in bboxes for item in sublist] + }, + 'transform': {'matrix': [[1.0]]} + } + item = { + '_id': f'test_file{i}', + 'name': os.path.basename(tiff_paths[i]), + 'largeImage': {'fileId': f'test_image_id{i}'}, + } + mask_item = { + '_id': f'test_file{i}', + 'name': '.tiff'.join(os.path.basename(tiff_path_pms[i]).split('.tiff')[:-1]), + 'largeImage': {'fileId': f'test_mask_id{i}'}, + } + annotrec = { + '_id': f'test_file{i}', + '_version': 0, + 'annotation': {'name': 'TorchTest'}, + } + items.append((item, annotrec, elem)) + + + gc = MagicMock() + base.getItemsAndAnnotations = MagicMock(return_value=items) + + with tempfile.TemporaryDirectory() as tmpdirname: + def mv_to_dst(req_pth : str, dst : str): + if req_pth.startswith("test_"): + for f in tiff_paths + tiff_path_pms: + dpath = os.path.join(dst, os.path.basename(f)) + if not os.path.exists(dpath) and os.path.basename(f) == os.path.basename(dst): + shutil.copy(f, dst) + print(f"Copied {f} to {dst}") + elif req_pth.startswith("feature"): + feature_files = glob.glob(os.path.join(tmpdirname, "*feature.h5")) + for f in feature_files: + dpath = os.path.join(dst, os.path.basename(f)) + if not os.path.exists(dpath) and os.path.basename(f) == os.path.basename(dst): + shutil.copy(f, dst) + print(f"Copied {f} to {dst}") + elif req_pth.endswith("model"): + model_file = glob.glob(os.path.join(tmpdirname, f"*Model *{0}.pth"))[0] + shutil.copy(model_file, dst) + elif "modtraining" in req_pth: + model_file = glob.glob(os.path.join(tmpdirname, f"*ModTraining *{0}.h5"))[0] + shutil.copy(model_file, dst) + else: + print(f"Received unknown request path '{req_pth}'") + return {} + + gc.downloadFile = MagicMock(side_effect=mv_to_dst) + def mv_to_src(req, src, reference=None): + shutil.copy(src, tmpdirname) + print(f"Copied {src} to {tmpdirname}") + # each WSI gets two separate .anot files. The below if statement gives them unique filenames so we can reference later + if src.endswith(".anot"): + # extract the number at the end of req, which can look like "testfile1" or "testfile1000" + m = re.search(r'(\d+)$', req) + num = int(m.group(1)) + s = os.path.basename(src).replace(".anot", f"_{num}.myanot") + shutil.copy(src, os.path.join(tmpdirname, s)) + print(f"Also copied {s} to {tmpdirname}") + return {'_id': 'feature', 'name': os.path.basename(src)} + gc.uploadFileToFolder = MagicMock(side_effect=mv_to_src, return_value=True) + + gc.getItem = MagicMock(return_value=mask_item) + + gc.listResource = MagicMock(return_value=[dict(name=f"{annotation_name}model", _id = 'model'), dict(name=f"{annotation_name}modtraining", _id = 'modtraining')]) + gc.uploadFileToItem = MagicMock(side_effect=mv_to_src, return_value=True) + gc.getFolder = MagicMock(return_value=dict(name='test_folder', creatorId='creatorId', _id='test_folder_id')) + + def list_file(req: str, limit: int = 0) -> iter: + if "modtraining" in req: + return iter([dict(name=req, _id = 'modtraining')]) + else: + return iter([dict(name=req, _id='model')]) + gc.listFile = MagicMock(side_effect=list_file) + + base.main(args, gc) + + for file in sorted(glob.glob(os.path.join(tmpdirname, f"*Predictions*.myanot"))): + assert os.path.exists(file) + with open(file, 'r') as f: + pred_json = json.load(f) + e = pred_json['elements'][0] + assert len(e['values']) == NUM_IMAGES_PER_WSI + + assert len(e['user']['bbox']) == NUM_IMAGES_PER_WSI * 4 # 4 is for x,y,w,h + + assert len(e['categories']) == len(mnist_labels) - 1 # -1 because we don't have a default category + assert len(e['user']['confidence']) == NUM_IMAGES_PER_WSI + + # compare e['values'] to labels['values'], to make sure we've trained a valid model + # the order of the values is shuffled in the annotation file, the ordering is in e['categories'] + file_num = int(file.split('Predictions_')[-1].split('.myanot')[0]) + predicted_labels = np.array([e['categories'][c]['label'] for c in e['values']]) + matches = (predicted_labels == np.array(list(map(str, labels[file_num]['value'])))) + similarity = matches.sum() / len(matches) + expected_min_accuracy = 0.75 + assert similarity > expected_min_accuracy, f"File {file}: Similarity between predicted values and GT is {similarity}, expected > {expected_min_accuracy}" + print(f"Similarity between predicted values and GT is {similarity}") + +@pytest.mark.skipif("RUNALL" not in os.environ, reason="this is a slow test (~1-10 min), run only if you want to") +@pytest.mark.parametrize('create_sample_data', [0], indirect=True) +def test_main_tf(create_sample_data): + global NUM_WSIS, PATCH_SIZE, MNIST_IMAGE_SIZE, COLOR_DIM, NUM_EPOCHS + tiff_paths, tiff_path_pms, num_images, labels = create_sample_data + base: SuperpixelClassificationBase = SuperpixelClassificationTensorflow() + + annotation_name = 'tensorflowMNISTtest' + config = dict( + annotationDir = 'annotationdir', + annotationName = annotation_name, + batchSize = int(np.sqrt(NUM_IMAGES_PER_WSI)), # one row of the wsi at a time + certainty = 'confidence', + cutoff = 600000, # plenty of space to allow all training samples + epochs = NUM_EPOCHS, + exclude = [], + feature = 'patch', + features = 'featuredir', + gensuperpixels = False, + girderApiUrl = 'http://localhost:8080/api/v1', + girderToken = '', + heatmaps = False, + images = 'imagedir', + labels = '', + magnification = 40.0, + modeldir = 'modeldir', + numWorkers = 1, + patchSize = PATCH_SIZE, + radius = MNIST_IMAGE_SIZE, + randominput = False, + split = 0.7, + train = True, + useCuda = False, + progress = True, + ) + args = argparse.Namespace(**config) + + mnist_labels = ['default', '1', '2', '3', '4', '5', '6', '7', '8', '9', '0'] + + items = [] + for i in range(NUM_WSIS): + bboxes = [[x, y, w + x, y + h] for _, (x, y, w, h) in labels[i][['x', 'y', 'w', 'h']].iterrows()] + elem = { + 'girderId': f'test_girder_id{i}', + 'categories': [ + {"label": c} for c in mnist_labels + ], + 'values': labels[i]['value'].tolist(), + 'user': { + 'bbox': [item for sublist in bboxes for item in sublist] + }, + 'transform': {'matrix': [[1.0]]} + } + item = { + '_id': f'test_file{i}', + 'name': os.path.basename(tiff_paths[i]), + 'largeImage': {'fileId': f'test_image_id{i}'}, + } + mask_item = { + '_id': f'test_file{i}', + 'name': '.tiff'.join(os.path.basename(tiff_path_pms[i]).split('.tiff')[:-1]), + 'largeImage': {'fileId': f'test_mask_id{i}'}, + } + annotrec = { + '_id': f'test_file{i}', + '_version': 0, + 'annotation': {'name': 'TorchTest'}, + } + items.append((item, annotrec, elem)) + + + gc = MagicMock() + base.getItemsAndAnnotations = MagicMock(return_value=items) + + with tempfile.TemporaryDirectory() as tmpdirname: + def mv_to_dst(req_pth : str, dst : str): + if req_pth.startswith("test_"): + for f in tiff_paths + tiff_path_pms: + dpath = os.path.join(dst, os.path.basename(f)) + if not os.path.exists(dpath) and os.path.basename(f) == os.path.basename(dst): + shutil.copy(f, dst) + print(f"MockDownload: Copied {f} to {dst}") + elif req_pth.startswith("feature"): + feature_files = glob.glob(os.path.join(tmpdirname, "*feature.h5")) + for f in feature_files: + dpath = os.path.join(dst, os.path.basename(f)) + if not os.path.exists(dpath) and os.path.basename(f) == os.path.basename(dst): + shutil.copy(f, dst) + print(f"MockDownload: Copied {f} to {dst}") + elif req_pth.endswith("model"): + model_file = glob.glob(os.path.join(tmpdirname, f"*Model *{0}.h5"))[0] + shutil.copy(model_file, dst) + elif "modtraining" in req_pth: + model_file = glob.glob(os.path.join(tmpdirname, f"*ModTraining *{0}.h5"))[0] + shutil.copy(model_file, dst) + else: + raise RuntimeError(f"Received unknown request path '{req_pth}'") + return {} + + gc.downloadFile = MagicMock(side_effect=mv_to_dst) + def mv_to_src(req, src, reference=None): + shutil.copy(src, tmpdirname) + print(f"MockUpload: Copied {src} to {tmpdirname}") + # each WSI gets two separate .anot files. The below if statement gives them unique filenames so we can reference later + if src.endswith(".anot"): + # extract the number at the end of req, which can look like "testfile1" or "testfile1000" + m = re.search(r'(\d+)$', req) + num = int(m.group(1)) + s = os.path.basename(src).replace(".anot", f"_{num}.myanot") + shutil.copy(src, os.path.join(tmpdirname, s)) + print(f"Also copied {s} to {tmpdirname}") + return {'_id': 'feature', 'name': os.path.basename(src)} + gc.uploadFileToFolder = MagicMock(side_effect=mv_to_src, return_value=True) + + gc.getItem = MagicMock(return_value=mask_item) + + modelName = f"{annotation_name} Model Epoch 0.h5" + modTrainingName = f"{annotation_name} ModTraining Epoch 0.h5" + gc.listResource = MagicMock(return_value=[dict(name=modelName, _id = 'model'), dict(name=modTrainingName, _id = 'modtraining')]) + gc.uploadFileToItem = MagicMock(side_effect=mv_to_src, return_value=True) + gc.getFolder = MagicMock(return_value=dict(name='test_folder', creatorId='creatorId', _id='test_folder_id')) + + def list_file(req: str, limit: int = 0) -> iter: + if "modtraining" in req: + return iter([dict(name=modTrainingName, _id = 'modtraining')]) + else: + return iter([dict(name=modelName, _id='model')]) + gc.listFile = MagicMock(side_effect=list_file) + + base.main(args, gc) + + for file in sorted(glob.glob(os.path.join(tmpdirname, f"*Predictions*.myanot"))): + assert os.path.exists(file) + with open(file, 'r') as f: + pred_json = json.load(f) + e = pred_json['elements'][0] + assert len(e['values']) == NUM_IMAGES_PER_WSI + + assert len(e['user']['bbox']) == NUM_IMAGES_PER_WSI * 4 # 4 is for x,y,w,h + + assert len(e['categories']) == len(mnist_labels) - 1 # exclude the default category + assert len(e['user']['confidence']) == NUM_IMAGES_PER_WSI + + # compare e['values'] to labels['values'], to make sure we've trained a valid model + # the order of the values is shuffled in the annotation file, the ordering is in e['categories'] + file_num = int(file.split('Predictions_')[-1].split('.myanot')[0]) + predicted_labels = np.array([e['categories'][c]['label'] for c in e['values']]) + matches = (predicted_labels == np.array(list(map(str, labels[file_num]['value'])))) + similarity = matches.sum() / len(matches) + expected_min_accuracy = 0.75 + assert similarity > expected_min_accuracy, f"File {file}: Similarity between predicted values and GT is {similarity}, expected > {expected_min_accuracy}" + print(f"Similarity between predicted values and GT is {similarity}") + +@pytest.mark.skipif("RUNALL" not in os.environ, reason="this is a slow test (~1-10 min), run only if you want to") +@pytest.mark.parametrize('create_sample_data', [2], indirect=True) +def test_main_tf_with_background(create_sample_data): + global NUM_WSIS, PATCH_SIZE, MNIST_IMAGE_SIZE, COLOR_DIM, NUM_EPOCHS + tiff_paths, tiff_path_pms, num_images, labels = create_sample_data + base: SuperpixelClassificationBase = SuperpixelClassificationTensorflow() + + annotation_name = 'tensorflowMNISTtest' + config = dict( + annotationDir = 'annotationdir', + annotationName = annotation_name, + batchSize = int(np.sqrt(NUM_IMAGES_PER_WSI)), # one row of the wsi at a time + certainty = 'confidence', + cutoff = 600000, # plenty of space to allow all training samples + epochs = NUM_EPOCHS, + exclude = [], + feature = 'patch', + features = 'featuredir', + gensuperpixels = False, + girderApiUrl = 'http://localhost:8080/api/v1', + girderToken = '', + heatmaps = False, + images = 'imagedir', + labels = '', + magnification = 40.0, + modeldir = 'modeldir', + numWorkers = 1, + patchSize = PATCH_SIZE, + radius = MNIST_IMAGE_SIZE, + randominput = False, + split = 0.7, + train = True, + useCuda = False, + progress = True, + ) + args = argparse.Namespace(**config) + + mnist_labels = ['default', '1', '2', '3', '4', '5', '6', '7', '8', '9', '0'] + + items = [] + for i in range(NUM_WSIS): + bboxes = [[x, y, w + x, y + h] for _, (x, y, w, h) in labels[i][['x', 'y', 'w', 'h']].iterrows()] + elem = { + 'girderId': f'test_girder_id{i}', + 'categories': [ + {"label": c} for c in mnist_labels + ], + 'values': [0] + labels[i]['value'].tolist(), + 'user': { + 'bbox': [0,0,1,1] + [item for sublist in bboxes for item in sublist] + }, + 'transform': {'matrix': [[1.0]]} + } + item = { + '_id': f'test_file{i}', + 'name': os.path.basename(tiff_paths[i]), + 'largeImage': {'fileId': f'test_image_id{i}'}, + } + mask_item = { + '_id': f'test_file{i}', + 'name': '.tiff'.join(os.path.basename(tiff_path_pms[i]).split('.tiff')[:-1]), + 'largeImage': {'fileId': f'test_mask_id{i}'}, + } + annotrec = { + '_id': f'test_file{i}', + '_version': 0, + 'annotation': {'name': 'TorchTest'}, + } + items.append((item, annotrec, elem)) + + + gc = MagicMock() + base.getItemsAndAnnotations = MagicMock(return_value=items) + + with tempfile.TemporaryDirectory() as tmpdirname: + def mv_to_dst(req_pth : str, dst : str): + if req_pth.startswith("test_"): + for f in tiff_paths + tiff_path_pms: + dpath = os.path.join(dst, os.path.basename(f)) + if not os.path.exists(dpath) and os.path.basename(f) == os.path.basename(dst): + shutil.copy(f, dst) + print(f"MockDownload: Copied {f} to {dst}") + elif req_pth.startswith("feature"): + feature_files = glob.glob(os.path.join(tmpdirname, "*feature.h5")) + for f in feature_files: + dpath = os.path.join(dst, os.path.basename(f)) + if not os.path.exists(dpath) and os.path.basename(f) == os.path.basename(dst): + shutil.copy(f, dst) + print(f"MockDownload: Copied {f} to {dst}") + elif req_pth.endswith("model"): + model_file = glob.glob(os.path.join(tmpdirname, f"*Model *{0}.h5"))[0] + shutil.copy(model_file, dst) + elif "modtraining" in req_pth: + model_file = glob.glob(os.path.join(tmpdirname, f"*ModTraining *{0}.h5"))[0] + shutil.copy(model_file, dst) + else: + raise RuntimeError(f"Received unknown request path '{req_pth}'") + return {} + + gc.downloadFile = MagicMock(side_effect=mv_to_dst) + def mv_to_src(req, src, reference=None): + shutil.copy(src, tmpdirname) + print(f"MockUpload: Copied {src} to {tmpdirname}") + # each WSI gets two separate .anot files. The below if statement gives them unique filenames so we can reference later + if src.endswith(".anot"): + # extract the number at the end of req, which can look like "testfile1" or "testfile1000" + m = re.search(r'(\d+)$', req) + num = int(m.group(1)) + s = os.path.basename(src).replace(".anot", f"_{num}.myanot") + shutil.copy(src, os.path.join(tmpdirname, s)) + print(f"Also copied {s} to {tmpdirname}") + return {'_id': 'feature', 'name': os.path.basename(src)} + gc.uploadFileToFolder = MagicMock(side_effect=mv_to_src, return_value=True) + + gc.getItem = MagicMock(return_value=mask_item) + + modelName = f"{annotation_name} Model Epoch 0.h5" + modTrainingName = f"{annotation_name} ModTraining Epoch 0.h5" + gc.listResource = MagicMock(return_value=[dict(name=modelName, _id = 'model'), dict(name=modTrainingName, _id = 'modtraining')]) + gc.uploadFileToItem = MagicMock(side_effect=mv_to_src, return_value=True) + gc.getFolder = MagicMock(return_value=dict(name='test_folder', creatorId='creatorId', _id='test_folder_id')) + + def list_file(req: str, limit: int = 0) -> iter: + if "modtraining" in req: + return iter([dict(name=modTrainingName, _id = 'modtraining')]) + else: + return iter([dict(name=modelName, _id='model')]) + gc.listFile = MagicMock(side_effect=list_file) + + base.main(args, gc) + + for file in sorted(glob.glob(os.path.join(tmpdirname, f"*Predictions*.myanot"))): + assert os.path.exists(file) + with open(file, 'r') as f: + pred_json = json.load(f) + e = pred_json['elements'][0] + assert len(e['values']) == NUM_IMAGES_PER_WSI + 1 + + assert len(e['user']['bbox']) == (NUM_IMAGES_PER_WSI + 1) * 4 # 4 is for x,y,w,h + + assert len(e['categories']) == len(mnist_labels) - 1 # exclude the default category + assert len(e['user']['confidence']) == (NUM_IMAGES_PER_WSI + 1) + + # compare e['values'] to labels['values'], to make sure we've trained a valid model + # the order of the values is shuffled in the annotation file, the ordering is in e['categories'] + file_num = int(file.split('Predictions_')[-1].split('.myanot')[0]) + predicted_labels = np.array([e['categories'][c]['label'] for c in e['values']]) + assert e['values'][0] == 0, "Background should have prediction 0" + matches = (predicted_labels == np.array([e['values'][0]] + list(map(str, labels[file_num]['value'])))) + similarity = matches.sum() / len(matches) + expected_min_accuracy = 0.75 + assert similarity > expected_min_accuracy, f"File {file}: Similarity between predicted values and GT is {similarity}, expected > {expected_min_accuracy}" + print(f"Similarity between predicted values and GT is {similarity}") diff --git a/superpixel_classification/SuperpixelClassification/tests/test_gen_superpixels.py b/superpixel_classification/SuperpixelClassification/tests/test_gen_superpixels.py new file mode 100644 index 0000000..5fc814f --- /dev/null +++ b/superpixel_classification/SuperpixelClassification/tests/test_gen_superpixels.py @@ -0,0 +1,164 @@ +import os +import shutil +import sys +import tempfile +from unittest.mock import MagicMock + +import h5py +import large_image +import numpy as np +import pytest +from PIL.Image import Image +from tifffile import tifffile + +# make pythonpath work out of the box - although your editor may complain +SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__)) +sys.path.append(os.path.dirname(SCRIPT_DIR)) + +from SuperpixelClassificationBase import SuperpixelClassificationBase +from progress_helper import ProgressHelper +from tests.generate_MNIST_image import create_mnist_image + +from xdg_base_dirs import ( xdg_cache_home, ) + +NUM_IMAGES : int = 9 +IMAGE_SIZE : int = 16 # 16 is the smallest tile size for .TIFFs, although we could operate within a single tile, too. +COLOR_DIM = 3 + + +def d_to_rgb(d): + r = d & 0xFF + g = (d >> 8) & 0xFF + b = (d >> 16) & 0xFF + return [r, g, b] + +@pytest.fixture(scope="session") +def create_sample_data(): + ''' + Create a sample WSI for testing. + ''' + global NUM_IMAGES, IMAGE_SIZE + num_images = NUM_IMAGES + with tempfile.TemporaryDirectory() as tmpdirname: + output_filename = os.path.join(tmpdirname, "test.tiff") + + if os.path.dirname(output_filename): + os.makedirs(os.path.dirname(output_filename), exist_ok=True) + if os.path.exists(output_filename): + os.remove(output_filename) + + # Arrange the images in a grid (so num_images should be a number with an integer root) + tile_rows, tile_cols = int(np.sqrt(num_images)), int(np.sqrt(num_images)) + tile_h, tile_w = 16, 16 + grid_h, grid_w = tile_rows * tile_h, tile_cols * tile_w + base_image = np.zeros((grid_h, grid_w, 3), dtype=np.uint8) + + vals = np.array([0, 127, 255], dtype=np.uint8) + colors = np.stack(np.meshgrid(vals, vals, vals), axis=-1).reshape(-1, 3)[:NUM_IMAGES] + images = np.tile(colors[:, None, None, :], (1, IMAGE_SIZE, IMAGE_SIZE, 1)) + + for idx, img in enumerate(images): + r = idx // tile_cols + c = idx % tile_cols + base_image[r*tile_h:(r+1)*tile_h, c*tile_w:(c+1)*tile_w, :] = img + + pyramid = [base_image] + current = base_image.copy() + while min(current.shape) >= 64: + # Use Pillow to resize (ANTIALIAS gives good quality downsampling) + im = Image.fromarray(current) + new_w, new_h = current.shape[1] // 2, current.shape[0] // 2 + if new_w < 1 or new_h < 1: + break + im_resized = im.resize((new_w, new_h)) + current = np.array(im_resized) + pyramid.append(current) + + # Save the image as a pyramidal TIFF. + # The base image is the main image and the pyramid list (excluding the base) is saved as subIFDs. + if os.path.dirname(output_filename): + os.makedirs(os.path.dirname(output_filename), exist_ok=True) + if os.path.exists(output_filename): + os.remove(output_filename) + + with tifffile.TiffWriter(output_filename, bigtiff=False) as tif: + tif.write(pyramid[0], + tile=(tile_w * 4, tile_h * 4), + photometric='RGB', + description='Whole-slide MNIST image at 40x magnification', + subifds=pyramid[1:]) + print(f"Pyramidal TIFF saved as {output_filename}") + + # we use yield so that the temporarydirectory is still open in the tests + yield output_filename, images + +def test_gen_superpixel(create_sample_data): + global IMAGE_SIZE, COLOR_DIM + test_image_pth, test_images = create_sample_data + base = SuperpixelClassificationBase() + + # Create test data + item = { + "_id": "test_item_id", + 'largeImage': {'fileId': 'test_image_id'}, + 'name': test_image_pth, + } + + # Mock girder client + gc = MagicMock() + def mv_to_dst(_, dst): + if not os.path.exists(os.path.join(dst, test_image_pth)): + shutil.copy(test_image_pth, dst) + print(">>> Copied file from", test_image_pth, "to", dst) + return None + gc.downloadFile = MagicMock(side_effect=mv_to_dst) + gc.getItem = MagicMock(return_value={'name': test_image_pth, 'largeImage': {'fileId': 'foobar'}}) + def mv_to_src(_, src): + dst = os.path.dirname(test_image_pth) + if not os.path.exists(os.path.join(dst, src)): + shutil.copy(src, dst) + print(">>> Copied file from", src, "to", dst) + return {'itemId': 'uploaded_item_id'} + gc.uploadFileToFolder = MagicMock(side_effect=mv_to_src, return_value={'_id': 'test_file_id'}) + #gc.uploadFileToFolder = MagicMock(return_value={'_id': 'test_file_id'}) + + #bboxes = [[x, y, w + x, y + h] for _, (x, y, w, h) in labels[['x', 'y', 'w', 'h']].iterrows()] + bboxes = [[x, x, x + IMAGE_SIZE, x + IMAGE_SIZE] for x in range(0, NUM_IMAGES, IMAGE_SIZE)] + + with ProgressHelper( 'Superpixel Classification', + 'Test feature', False) as prog: + prog.progress(0) + prog.items([item]) + result = base.createSuperpixelsForItem( + gc=gc, + annotationName="TorchTest", + item=item, + radius=IMAGE_SIZE, + magnification=40, + annotationFolderId='annotation_folder_id', + userId="user_id", + prog=prog, + ) + + out_pixelmap_file = os.path.join(os.path.dirname(test_image_pth), '%s.pixelmap.tiff' % item['name']) + assert os.path.exists(out_pixelmap_file), f"Output file {out_pixelmap_file} does not exist" + x, y, x2, y2 = 0, 0, IMAGE_SIZE, IMAGE_SIZE + ts = large_image.getTileSource(test_image_pth) + orig_image = ts.getRegion( + region=dict(left=x, top=y, right=x2, bottom=y2), + format=large_image.tilesource.TILE_FORMAT_NUMPY + )[0] + # test that all values in orig_image is equal to 1 + # TODO: waiting for another PR: want this to be 1 + assert np.all(orig_image == 0) + + feature_img = test_images[-1] + x, y, x2, y2 = IMAGE_SIZE * (IMAGE_SIZE - 1), IMAGE_SIZE * (IMAGE_SIZE - 1), IMAGE_SIZE * IMAGE_SIZE, IMAGE_SIZE * IMAGE_SIZE + ts = large_image.getTileSource(test_image_pth) + orig_image = ts.getRegion( + region=dict(left=x, top=y, right=x2, bottom=y2), + format=large_image.tilesource.TILE_FORMAT_NUMPY + )[0] + orig_image = orig_image.astype(feature_img.dtype) + # TODO: same as TODO above + assert np.all(orig_image == NUM_IMAGES - 1) \ No newline at end of file diff --git a/superpixel_classification/SuperpixelClassification/tests/test_predict.py b/superpixel_classification/SuperpixelClassification/tests/test_predict.py new file mode 100644 index 0000000..9341a90 --- /dev/null +++ b/superpixel_classification/SuperpixelClassification/tests/test_predict.py @@ -0,0 +1,254 @@ +import json +import os +import shutil +import tempfile +from unittest.mock import MagicMock + +import h5py +import numpy as np +import pytest +import torch + +# make pythonpath work out of the box - although your editor may complain +import sys +import os +SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__)) +sys.path.append(os.path.dirname(SCRIPT_DIR)) + +from SuperpixelClassificationBase import SuperpixelClassificationBase +from SuperpixelClassificationTorch import SuperpixelClassificationTorch, _BayesianPatchTorchModel +from progress_helper import ProgressHelper +from tests.validate_json_annotation import validate_json_file + +# currently, torch model only supports 100x100 +MNIST_IMAGE_SIZE=100 +COLOR_DIM = 3 +NUM_IMAGES = 64 +CUTOFF_IMAGES = 2 + +@pytest.fixture(scope="session") +def create_sample_data(): + global NUM_IMAGES, CUTOFF_IMAGES + with tempfile.TemporaryDirectory() as tmpdirname: + h5_path = os.path.join(tmpdirname, "test_data.h5") + + images = np.random.randint(0, 255, size=(NUM_IMAGES - CUTOFF_IMAGES, MNIST_IMAGE_SIZE, MNIST_IMAGE_SIZE, COLOR_DIM), dtype=np.uint8) + indices = np.arange(NUM_IMAGES - CUTOFF_IMAGES) + assert images.shape[0] == indices.shape[0] + + with h5py.File(h5_path, 'w') as f: + f.create_dataset('images', data=images) + f.create_dataset('used_indices', data=indices, dtype='i') + + # we use yield so that the temporarydirectory is still open in the tests + yield h5_path + +''' +This test checks to predictions on a dataset that is only labeled with two values of out ten categories. +''' +def test_subset_labels(create_sample_data): + global NUM_IMAGES, CUTOFF_IMAGES + h5_path = create_sample_data + base: SuperpixelClassificationBase = SuperpixelClassificationTorch() + base.certainty = 'batchbald' + base.feature_is_image = True + # Mock girder client + gc = MagicMock() + def mv_to_dst(_, dst): + return shutil.copy(h5_path, dst) + gc.downloadFile = MagicMock(side_effect=mv_to_dst) + gc.uploadFileToItem = MagicMock() + + feature = { + '_id': '0', + 'name': 'my_test_feature' + } + labels = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9'] + annotrec = { + 'annotation': { + 'attributes': {}, + 'name': 'TorchTest', + }, + } + + # make a list with values 1 and 3 in a random order, and NUM_IMAGES entries + value_list = [1, 3] * (NUM_IMAGES // 2) + + elem = { + "type": "pixelmap", + "girderId": "6838aab654f0ca783ff03871", + "transform": {"matrix": [[1.0, 0], [0, 1.0]]}, + 'values': value_list, + 'categories' : [{"label": k, "fillColor": "rgba(0,0,0,0)"} for k in labels], + "boundaries": True, + "id": "myid", + 'user': { }, + } + + groups = { k: {"label": k, "fillColor": "rgba(0,0,0,0)", "strokeColor": "rgba(0,0,0,0)" } for k in labels } + + device = torch.device("cpu") + model = _BayesianPatchTorchModel(len(labels), device) + model.device = device + + items = [(feature, annotrec, elem)] + item = {'_id': 0, 'name': 'my_item', 'largeImage': {'fileId': 'test_image_id'}} + with ProgressHelper( 'Superpixel Classification', + 'Test feature', False) as prog: + prog.progress(0) + prog.items(items) + + annotation_name = 'testannotation' + with tempfile.TemporaryDirectory() as tmpdirname: + base.predictLabelsForItem( + gc=gc, + annotationName=annotation_name, + tempdir=tmpdirname, + model=model, + item=item, + annotrec=annotrec, + elem=elem, + feature=feature, + curEpoch=0, + userId='user_id', + labels=labels, + groups=groups, + makeHeatmaps=False, + radius=-1, + magnification=40.0, + certainty='batchbald', + batchSize=NUM_IMAGES, + use_cuda = False, + prog=prog, + ) + out_pth = os.path.join(tmpdirname, '%s Epoch 0 Predictions.anot' % annotation_name) + assert os.path.exists(out_pth), "Output file %s does not exist" % out_pth + with open(out_pth, 'r') as f: + pred_json = json.load(f) + e = pred_json['elements'][0] + assert len(e['values']) == NUM_IMAGES + for i in range(1, CUTOFF_IMAGES): + assert e['values'][-i] == 0, "Expected unknown/none label for cutoff images" + assert len(e['categories']) == len(labels) + assert len(e['user']['confidence']) == NUM_IMAGES + assert len(e['user']['categoryConfidence']) == NUM_IMAGES + assert len(e['user']['categoryConfidence'][0]) == len(labels) + assert len(e['user']['certainty']) == NUM_IMAGES + for i in range(1, CUTOFF_IMAGES): + assert e['user']['certainty'][-i] > 10000, "Expected certainty to be very high for unlabeled samples to ensure they occur last in the AL filmstrip (DSA)" + assert 'percentiles' in e['user']['certainty_info'] + assert 'cdf' in e['user']['certainty_info'] + + validate_json_file(out_pth) + + out_pth = os.path.join(tmpdirname, '%s Epoch 1.anot' % annotation_name) + assert os.path.exists(out_pth), "Output file %s does not exist" % out_pth + with open(out_pth, 'r') as f: + annotation_file = json.load(f) + e = annotation_file['elements'][0] + assert len(e['values']) == NUM_IMAGES + assert len(e['categories']) == len(labels) + + validate_json_file(out_pth) + +def test_predict_unlabeled_with_cutoff(create_sample_data): + global NUM_IMAGES, CUTOFF_IMAGES + h5_path = create_sample_data + base: SuperpixelClassificationBase = SuperpixelClassificationTorch() + base.certainty = 'batchbald' + base.feature_is_image = True + # Mock girder client + gc = MagicMock() + def mv_to_dst(_, dst): + return shutil.copy(h5_path, dst) + gc.downloadFile = MagicMock(side_effect=mv_to_dst) + gc.uploadFileToItem = MagicMock() + + feature = { + '_id': '0', + 'name': 'my_test_feature' + } + labels = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9'] + annotrec = { + 'annotation': { + 'attributes': {}, + 'name': 'TorchTest', + }, + } + + elem = { + "type": "pixelmap", + "girderId": "6838aab654f0ca783ff03871", + "transform": {"matrix": [[1.0, 0], [0, 1.0]]}, + 'values': [0] * NUM_IMAGES, + 'categories' : [{"label": k, "fillColor": "rgba(0,0,0,0)"} for k in labels], + "boundaries": True, + "id": "myid", + 'user': { }, + } + + groups = { k: {"label": k, "fillColor": "rgba(0,0,0,0)", "strokeColor": "rgba(0,0,0,0)" } for k in labels } + + device = torch.device("cpu") + model = _BayesianPatchTorchModel(len(labels), device) + model.device = device + + items = [(feature, annotrec, elem)] + item = {'_id': 0, 'name': 'my_item', 'largeImage': {'fileId': 'test_image_id'}} + with ProgressHelper( 'Superpixel Classification', + 'Test feature', False) as prog: + prog.progress(0) + prog.items(items) + + annotation_name = 'testannotation' + with tempfile.TemporaryDirectory() as tmpdirname: + base.predictLabelsForItem( + gc=gc, + annotationName=annotation_name, + tempdir=tmpdirname, + model=model, + item=item, + annotrec=annotrec, + elem=elem, + feature=feature, + curEpoch=0, + userId='user_id', + labels=labels, + groups=groups, + makeHeatmaps=False, + radius=-1, + magnification=40.0, + certainty='batchbald', + batchSize=NUM_IMAGES, + use_cuda = False, + prog=prog, + ) + out_pth = os.path.join(tmpdirname, '%s Epoch 0 Predictions.anot' % annotation_name) + assert os.path.exists(out_pth), "Output file %s does not exist" % out_pth + with open(out_pth, 'r') as f: + pred_json = json.load(f) + e = pred_json['elements'][0] + assert len(e['values']) == NUM_IMAGES + for i in range(1, CUTOFF_IMAGES): + assert e['values'][-i] == 0, "Expected unknown/none label for cutoff images" + assert len(e['categories']) == len(labels) + assert len(e['user']['confidence']) == NUM_IMAGES + assert len(e['user']['categoryConfidence']) == NUM_IMAGES + assert len(e['user']['categoryConfidence'][0]) == len(labels) + assert len(e['user']['certainty']) == NUM_IMAGES + for i in range(1, CUTOFF_IMAGES): + assert e['user']['certainty'][-i] > 10000, "Expected certainty to be very high for unlabeled samples to ensure they occur last in the AL filmstrip (DSA)" + assert 'percentiles' in e['user']['certainty_info'] + assert 'cdf' in e['user']['certainty_info'] + + validate_json_file(out_pth) + + out_pth = os.path.join(tmpdirname, '%s Epoch 1.anot' % annotation_name) + assert os.path.exists(out_pth), "Output file %s does not exist" % out_pth + with open(out_pth, 'r') as f: + annotation_file = json.load(f) + e = annotation_file['elements'][0] + assert len(e['values']) == NUM_IMAGES + assert len(e['categories']) == len(labels) + + validate_json_file(out_pth) diff --git a/superpixel_classification/SuperpixelClassification/tests/test_tensorflow.py b/superpixel_classification/SuperpixelClassification/tests/test_tensorflow.py new file mode 100644 index 0000000..1a40365 --- /dev/null +++ b/superpixel_classification/SuperpixelClassification/tests/test_tensorflow.py @@ -0,0 +1,93 @@ +import os +import shutil +import tempfile +from unittest.mock import MagicMock + +import h5py +import numpy as np +import pytest + +# make pythonpath work out of the box - although your editor may complain +import sys +import os +SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__)) +sys.path.append(os.path.dirname(SCRIPT_DIR)) + +from SuperpixelClassificationBase import SuperpixelClassificationBase +from SuperpixelClassificationTensorflow import SuperpixelClassificationTensorflow +from progress_helper import ProgressHelper + +MNIST_IMAGE_SIZE=28 +COLOR_DIM = 3 +NUM_IMAGES = 64 + +@pytest.fixture(scope="session") +def create_sample_data(): + global NUM_IMAGES + with tempfile.TemporaryDirectory() as tmpdirname: + h5_path = os.path.join(tmpdirname, "test_data.h5") + images = np.random.randint(0, 255, size=(NUM_IMAGES, MNIST_IMAGE_SIZE, MNIST_IMAGE_SIZE, COLOR_DIM), dtype=np.uint8) + + with h5py.File(h5_path, 'w') as f: + f.create_dataset('images', data=images) + f.create_dataset('used_indices', data=np.arange(NUM_IMAGES - 2)) + + # we use yield so that that the temporarydirectory is still open in the tests + yield h5_path + +def test_train_model(create_sample_data): + global NUM_IMAGES + h5_path = create_sample_data + base: SuperpixelClassificationBase + base = SuperpixelClassificationTensorflow() + base.feature_is_image = True + base.certainty = 'not batchbald' # same as using tensorflow + + # Mock girder client + gc = MagicMock() + def mv_to_dst(_, dst): + return shutil.copy(h5_path, dst) + gc.downloadFile = MagicMock(side_effect=mv_to_dst) + def mv_to_src(_, src): + dst = os.path.dirname(os.path.dirname(h5_path)) + return shutil.copy(src, dst) + gc.uploadFileToFolder = MagicMock(side_effect=mv_to_src, return_value=True) + + labels = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9'] + elem = { + 'girderId': 'test_girder_id', + 'categories': [ + {"label": c} for c in labels + ], + 'values': + [] \ + + np.random.randint(1, len(labels) - 1, size=(NUM_IMAGES - 2), dtype=np.uint8).tolist() + + [0, 0], # last two images unlabeled + 'transform': {'matrix': [[1.0]]} + } + + item = {'_id': 'test_h5_file', 'name': 'test'} + annotrec = {'_id': '1', '_version': 0, 'annotation': {'name': 'TorchTest'}} + items = [(item, annotrec, elem)] + with ProgressHelper( 'Superpixel Classification', + 'Test feature', False) as prog: + prog.progress(0) + prog.items(items) + modelFile, modelTrainingFile = base.trainModel( + annotationName="TorchTest", + batchSize = 4, + epochs = 1, + excludeLabelList = [], + features={'test_h5_file': {'_id': 'feature_id', 'name': 'test_h5_file'}}, + gc=gc, + itemsAndAnnot=items, + labelList = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9'], + modelFolderId="test_folder_id", + prog=prog, + randomInput = False, + trainingSplit = 0.5, + use_cuda = False, + ) + + assert os.path.exists(modelFile) + assert os.path.exists(modelTrainingFile) \ No newline at end of file diff --git a/superpixel_classification/SuperpixelClassification/tests/test_torch.py b/superpixel_classification/SuperpixelClassification/tests/test_torch.py new file mode 100644 index 0000000..edb7dbc --- /dev/null +++ b/superpixel_classification/SuperpixelClassification/tests/test_torch.py @@ -0,0 +1,94 @@ +import os +import shutil +import tempfile +from unittest.mock import MagicMock + +import h5py +import numpy as np +import pytest + +# make pythonpath work out of the box - although your editor may complain +import sys +import os +SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__)) +sys.path.append(os.path.dirname(SCRIPT_DIR)) + +from SuperpixelClassificationBase import SuperpixelClassificationBase +from SuperpixelClassificationTorch import SuperpixelClassificationTorch +from progress_helper import ProgressHelper + +# currently, torch model only supports 100x100 +MNIST_IMAGE_SIZE=100 +COLOR_DIM = 3 +NUM_IMAGES = 64 + +@pytest.fixture(scope="session") +def create_sample_data(): + global NUM_IMAGES + with tempfile.TemporaryDirectory() as tmpdirname: + h5_path = os.path.join(tmpdirname, "test_data.h5") + images = np.random.randint(0, 255, size=(NUM_IMAGES, MNIST_IMAGE_SIZE, MNIST_IMAGE_SIZE, COLOR_DIM), dtype=np.uint8) + + with h5py.File(h5_path, 'w') as f: + f.create_dataset('images', data=images) + f.create_dataset('used_indices', data=np.arange(NUM_IMAGES - 2)) + + # we use yield so that that the temporarydirectory is still open in the tests + yield h5_path + +def test_train_model(create_sample_data): + global NUM_IMAGES + h5_path = create_sample_data + base: SuperpixelClassificationBase + base = SuperpixelClassificationTorch() + base.feature_is_image = True + base.certainty = 'batchbald' # same as using torch + + # Mock girder client + gc = MagicMock() + def mv_to_dst(_, dst): + return shutil.copy(h5_path, dst) + gc.downloadFile = MagicMock(side_effect=mv_to_dst) + def mv_to_src(_, src): + dst = os.path.dirname(os.path.dirname(h5_path)) + return shutil.copy(src, dst) + gc.uploadFileToFolder = MagicMock(side_effect=mv_to_src, return_value=True) + + labels = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9'] + elem = { + 'girderId': 'test_girder_id', + 'categories': [ + {"label": c} for c in labels + ], + 'values': + [] \ + + np.random.randint(1, len(labels) - 1, size=(NUM_IMAGES - 2), dtype=np.uint8).tolist() + + [0, 0], # last two images unlabeled + 'transform': {'matrix': [[1.0]]} + } + + item = {'_id': 'test_h5_file', 'name': 'test'} + annotrec = {'_id': '1', '_version': 0, 'annotation': {'name': 'TorchTest'}} + items = [(item, annotrec, elem)] + with ProgressHelper( 'Superpixel Classification', + 'Test feature', False) as prog: + prog.progress(0) + prog.items(items) + modelFile, modelTrainingFile = base.trainModel( + annotationName="TorchTest", + batchSize = 4, + epochs = 1, + excludeLabelList = [], + features={'test_h5_file': {'_id': 'feature_id', 'name': 'test_h5_file'}}, + gc=gc, + itemsAndAnnot=items, + labelList = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9'], + modelFolderId="test_folder_id", + prog=prog, + randomInput = False, + trainingSplit = 0.5, + use_cuda = True, + ) + + assert os.path.exists(modelFile) + assert os.path.exists(modelTrainingFile) diff --git a/superpixel_classification/SuperpixelClassification/tests/validate_json_annotation.py b/superpixel_classification/SuperpixelClassification/tests/validate_json_annotation.py new file mode 100644 index 0000000..5f209ce --- /dev/null +++ b/superpixel_classification/SuperpixelClassification/tests/validate_json_annotation.py @@ -0,0 +1,588 @@ +#!/usr/bin/env python +''' +This code is similar to girder_annotation/girder_large_image_annotation/models/annotation.py +The meaning is to validate the json annotation file without having to use girder or large_image +''' +import argparse +import json +import logging +import os +import sys +import jsonschema +from tqdm import tqdm + +import copy + +def extendSchema(base, add): + extend = copy.deepcopy(base) + for key in add: + if key == 'required' and 'required' in base: + extend[key] = sorted(set(extend[key]) | set(add[key])) + elif key != 'properties' and 'properties' in base: + extend[key] = add[key] + if 'properties' in add: + extend['properties'].update(add['properties']) + return extend + + +colorSchema = { + 'type': 'string', + # We accept colors of the form + # #rrggbb six digit RRGGBB hex + # #rgb three digit RGB hex + # #rrggbbaa eight digit RRGGBBAA hex + # #rgba four digit RGBA hex + # rgb(255, 255, 255) rgb decimal triplet + # rgba(255, 255, 255, 1) rgba quad with RGB in the range [0-255] and + # alpha [0-1] + 'pattern': r'^(#([0-9a-fA-F]{3,4}|[0-9a-fA-F]{6}|[0-9a-fA-F]{8})|' + r'rgb\(\d+,\s*\d+,\s*\d+\)|' + r'rgba\(\d+,\s*\d+,\s*\d+,\s*(\d?\.|)\d+\))$', +} + +transformArray = { + 'type': 'array', + 'items': { + 'type': 'array', + 'minItems': 2, + 'maxItems': 2, + }, + 'minItems': 2, + 'maxItems': 2, + 'description': 'A 2D matrix representing the transform of an ' + 'image overlay.', +} + + +colorRangeSchema = { + 'type': 'array', + 'items': colorSchema, + 'description': 'A list of colors', +} + +rangeValueSchema = { + 'type': 'array', + 'items': {'type': 'number'}, + 'description': 'A weakly monotonic list of range values', +} + +userSchema = { + 'type': 'object', + 'additionalProperties': True, +} + +labelSchema = { + 'type': 'object', + 'properties': { + 'value': {'type': 'string'}, + 'visibility': { + 'type': 'string', + # TODO: change to True, False, None? + 'enum': ['hidden', 'always', 'onhover'], + }, + 'fontSize': { + 'type': 'number', + 'exclusiveMinimum': 0, + }, + 'color': colorSchema, + }, + 'required': ['value'], + 'additionalProperties': False, +} + +groupSchema = {'type': 'string'} + +baseElementSchema = { + 'type': 'object', + 'properties': { + 'id': { + 'type': 'string', + 'pattern': '^[0-9a-f]{24}$', + }, + 'type': {'type': 'string'}, + # schema free field for users to extend annotations + 'user': userSchema, + 'label': labelSchema, + 'group': groupSchema, + }, + 'required': ['type'], + 'additionalProperties': True, +} +baseShapeSchema = extendSchema(baseElementSchema, { + 'properties': { + 'lineColor': colorSchema, + 'lineWidth': { + 'type': 'number', + 'minimum': 0, + }, + }, +}) + + +pixelmapCategorySchema = { + 'type': 'object', + 'properties': { + 'fillColor': colorSchema, + 'strokeColor': colorSchema, + 'label': { + 'type': 'string', + 'description': 'A string representing the semantic ' + 'meaning of regions of the map with ' + 'the corresponding color.', + }, + 'description': { + 'type': 'string', + 'description': 'A more detailed explanation of the ' + 'meaining of this category.', + }, + }, + 'required': ['fillColor'], + 'additionalProperties': False, +} + +_annotationSchema = { + 'type': 'object', + 'properties': { + 'value': colorSchema, + 'id': colorSchema, + 'label': { + 'type': 'string', + 'description': 'A string representing the semantic ' + 'meaning of regions of the map with ' + 'the corresponding color.', + }, + 'description': { + 'type': 'string', + 'description': 'A more detailed explanation of the ' + 'meaining of this category.', + }, + }, + 'required': ['fillColor'], + 'additionalProperties': False, +} + + +overlaySchema = extendSchema(baseElementSchema, { + 'properties': { + 'type': { + 'type': 'string', + 'enum': ['image'], + }, + 'girderId': { + 'type': 'string', + 'pattern': '^[0-9a-f]{24}$', + 'description': 'Girder item ID containing the image to ' + 'overlay.', + }, + 'opacity': { + 'type': 'number', + 'minimum': 0, + 'maximum': 1, + 'description': 'Default opacity for this image overlay. Must ' + 'be between 0 and 1. Defaults to 1.', + }, + 'hasAlpha': { + 'type': 'boolean', + 'description': + 'If true, the image is treated assuming it has an alpha ' + 'channel.', + }, + 'transform': { + 'type': 'object', + 'description': 'Specification for an affine transform of the ' + 'image overlay. Includes a 2D transform matrix, ' + 'an X offset and a Y offset.', + 'properties': { + 'xoffset': { + 'type': 'number', + }, + 'yoffset': { + 'type': 'number', + }, + 'matrix': transformArray, + }, + }, + }, + 'required': ['girderId', 'type'], + 'additionalProperties': False, + 'description': 'An image overlay on top of the base resource.', +}) + + +pixelmapSchema = extendSchema(overlaySchema, { + 'properties': { + 'type': { + 'type': 'string', + 'enum': ['pixelmap'], + }, + 'values': { + 'type': 'array', + 'items': {'type': 'integer'}, + 'description': 'An array where the indices ' + 'correspond to pixel values in the ' + 'pixel map image and the values are ' + 'used to look up the appropriate ' + 'color in the categories property.', + }, + 'categories': { + 'type': 'array', + 'items': pixelmapCategorySchema, + 'description': 'An array used to map between the ' + 'values array and color values. ' + 'Can also contain semantic ' + 'information for color values.', + }, + 'boundaries': { + 'type': 'boolean', + 'description': 'True if the pixelmap doubles pixel ' + 'values such that even values are the ' + 'fill and odd values the are stroke ' + 'of each superpixel. If true, the ' + 'length of the values array should be ' + 'half of the maximum value in the ' + 'pixelmap.', + + }, + }, + 'required': ['values', 'categories', 'boundaries'], + 'additionalProperties': False, + 'description': 'A tiled pixelmap to overlay onto a base resource.', +}) + +bboxSchema = extendSchema(overlaySchema, { + 'properties': { + 'type': { + 'type': 'string', + 'enum': ['bboxmap'], + }, + 'categories': { + 'type': 'array', + 'items': pixelmapCategorySchema, + 'description': 'An array used to map between the ' + 'values array and color values. ' + 'Can also contain semantic ' + 'information for color values.', + }, + 'annotations': { + 'type': 'array', + 'description': 'Value, id, and bounding box for each annotation', + 'items': { + 'type': 'object', + 'additionalProperties': False, + 'properties': { + 'value': { + 'type': 'integer', + }, + 'id': { + 'type': 'integer', + }, + 'bbox': { + 'type': 'array', + 'items': {'type': 'number'}, + 'minItems': 4, + 'maxItems': 4, + 'description': 'Bounding box in the form ' + '[left, top, right, bottom].', + }, + } + } + }, + 'boundaries': { + 'type': 'boolean', + 'description': 'True if the pixelmap doubles pixel ' + 'values such that even values are the ' + 'fill and odd values the are stroke ' + 'of each superpixel. If true, the ' + 'length of the values array should be ' + 'half of the maximum value in the ' + 'pixelmap.', + + }, + }, + 'required': ['categories', 'boundaries', 'annotations'], + 'additionalProperties': True, + 'description': 'A tiled pixelmap to overlay onto a base resource.', +}) + +annotationElementSchema = { + # Shape subtypes are mutually exclusive, so for efficiency, don't use + # 'oneOf' + 'anyOf': [ + pixelmapSchema, + bboxSchema, + ], +} + + +class AnnotationSchema: + annotationSchema = { + '$schema': 'http://json-schema.org/schema#', + 'type': 'object', + 'properties': { + 'name': { + 'type': 'string', + # TODO: Disallow empty? + 'minLength': 1, + }, + 'description': {'type': 'string'}, + 'display': { + 'type': 'object', + 'properties': { + 'visible': { + 'type': ['boolean', 'string'], + 'enum': ['new', True, False], + 'description': 'This advises viewers on when the ' + 'annotation should be shown. If "new" (the default), ' + 'show the annotation when it is first added to the ' + "system. If false, don't show the annotation by " + 'default. If true, show the annotation when the item ' + 'is displayed.', + }, + }, + }, + 'attributes': { + 'type': 'object', + 'additionalProperties': True, + 'title': 'Image Attributes', + 'description': 'Subjective things that apply to the entire ' + 'image.', + }, + 'elements': { + 'type': 'array', + 'items': annotationElementSchema, + # We want to ensure unique element IDs, if they are set. If + # they are not set, we assign them from Mongo. + 'title': 'Image Markup', + 'description': 'Subjective things that apply to a ' + 'spatial region.', + }, + }, + 'additionalProperties': False, + } + + + + coordSchema = { + 'type': 'array', + # TODO: validate that z==0 for now + 'items': { + 'type': 'number', + }, + 'minItems': 3, + 'maxItems': 3, + 'name': 'Coordinate', + # TODO: define origin for 3D images + 'description': 'An X, Y, Z coordinate tuple, in base layer pixel ' + 'coordinates, where the origin is the upper-left.', + } + coordValueSchema = { + 'type': 'array', + 'items': { + 'type': 'number', + }, + 'minItems': 4, + 'maxItems': 4, + 'name': 'CoordinateWithValue', + 'description': 'An X, Y, Z, value coordinate tuple, in base layer ' + 'pixel coordinates, where the origin is the upper-left.', + } + + colorSchema = { + 'type': 'string', + # We accept colors of the form + # #rrggbb six digit RRGGBB hex + # #rgb three digit RGB hex + # #rrggbbaa eight digit RRGGBBAA hex + # #rgba four digit RGBA hex + # rgb(255, 255, 255) rgb decimal triplet + # rgba(255, 255, 255, 1) rgba quad with RGB in the range [0-255] and + # alpha [0-1] + # TODO: make rgb and rgba spec validate that rgb is [0-255] and a is + # [0-1], rather than just checking if they are digits and such. + 'pattern': r'^(#([0-9a-fA-F]{3,4}|[0-9a-fA-F]{6}|[0-9a-fA-F]{8})|' + r'rgb\(\d+,\s*\d+,\s*\d+\)|' + r'rgba\(\d+,\s*\d+,\s*\d+,\s*(\d?\.|)\d+\))$', + } + + colorRangeSchema = { + 'type': 'array', + 'items': colorSchema, + 'description': 'A list of colors', + } + + rangeValueSchema = { + 'type': 'array', + 'items': {'type': 'number'}, + 'description': 'A weakly monotonic list of range values', + } + + userSchema = { + 'type': 'object', + 'additionalProperties': True, + } + + labelSchema = { + 'type': 'object', + 'properties': { + 'value': {'type': 'string'}, + 'visibility': { + 'type': 'string', + # TODO: change to True, False, None? + 'enum': ['hidden', 'always', 'onhover'], + }, + 'fontSize': { + 'type': 'number', + 'exclusiveMinimum': 0, + }, + 'color': colorSchema, + }, + 'required': ['value'], + 'additionalProperties': False, + } + + groupSchema = {'type': 'string'} + + baseElementSchema = { + 'type': 'object', + 'properties': { + 'id': { + 'type': 'string', + 'pattern': '^[0-9a-f]{24}$', + }, + 'type': {'type': 'string'}, + # schema free field for users to extend annotations + 'user': userSchema, + 'label': labelSchema, + 'group': groupSchema, + }, + 'required': ['type'], + 'additionalProperties': True, + } + baseShapeSchema = extendSchema(baseElementSchema, { + 'properties': { + 'lineColor': colorSchema, + 'lineWidth': { + 'type': 'number', + 'minimum': 0, + }, + }, + }) + + pointShapeSchema = extendSchema(baseShapeSchema, { + 'properties': { + 'type': { + 'type': 'string', + 'enum': ['point'], + }, + 'center': coordSchema, + 'fillColor': colorSchema, + }, + 'required': ['type', 'center'], + 'additionalProperties': False, + }) + + arrowShapeSchema = extendSchema(baseShapeSchema, { + 'properties': { + 'type': { + 'type': 'string', + 'enum': ['arrow'], + }, + 'points': { + 'type': 'array', + 'items': coordSchema, + 'minItems': 2, + 'maxItems': 2, + }, + 'fillColor': colorSchema, + }, + 'description': 'The first point is the head of the arrow', + 'required': ['type', 'points'], + 'additionalProperties': False, + }) + + circleShapeSchema = extendSchema(baseShapeSchema, { + 'properties': { + 'type': { + 'type': 'string', + 'enum': ['circle'], + }, + 'center': coordSchema, + 'radius': { + 'type': 'number', + 'minimum': 0, + }, + 'fillColor': colorSchema, + }, + 'required': ['type', 'center', 'radius'], + 'additionalProperties': False, + }) + + polylineShapeSchema = extendSchema(baseShapeSchema, { + 'properties': { + 'type': { + 'type': 'string', + 'enum': ['polyline'], + }, + 'points': { + 'type': 'array', + 'items': coordSchema, + 'minItems': 2, + }, + 'fillColor': colorSchema, + 'closed': { + 'type': 'boolean', + 'description': 'polyline is open if closed flag is ' + 'not specified', + }, + 'holes': { + 'type': 'array', + 'description': + 'If closed is true, this is a list of polylines that are ' + 'treated as holes in the base polygon. These should not ' + 'cross each other and should be contained within the base ' + 'polygon.', + 'items': { + 'type': 'array', + 'items': coordSchema, + 'minItems': 3, + }, + }, + }, + 'required': ['type', 'points'], + 'additionalProperties': False, + }) + + +def validate_annotation(annotation_dict): + validator = jsonschema.Draft6Validator(AnnotationSchema.annotationSchema) + validatorElement = jsonschema.Draft6Validator(AnnotationSchema.baseElementSchema) + + validator.validate(annotation_dict) + for element in tqdm(annotation_dict['elements']): + validatorElement.validate(element) + +def validate_json_file(json_dst): + with open(json_dst, 'r') as f: + data = json.load(f) + validate_annotation(data) + # num_elem = len(data['elements'][0]['annotations']) + # if num_elem % 4 != 0: + # raise ValueError(f"Number of elements ({num_elem}) is not a multiple of 4") + # num_values = len(data['elements'][0]['annotations']) + # if int(num_elem / 4) != num_values: + # raise ValueError(f"Number of elements ({num_elem / 4}) does not match values ({num_values})") + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description='Validate a json annotation file') + parser.add_argument('--input', default=os.path.join("out", "superpixel.anot"), type=str, + help='Name of input json file with a pixelmap annotation"') + args = parser.parse_args() + # Call the function with the filenames + logging.basicConfig(stream=sys.stdout, level=logging.INFO) + + if not os.path.exists(args.input): + logging.error(f"Annotation path {args.input} does not exist") + sys.exit(1) + + validate_json_file(args.input) + logging.info("Done validating annotation ['%s']", args.input) From 3cde3a69d475fe4486363a1a10ea45c532b2bffa Mon Sep 17 00:00:00 2001 From: Anders Sildnes Date: Wed, 28 May 2025 10:16:40 -0500 Subject: [PATCH 4/5] Make girder client a parameter Easier for tests --- .../SuperpixelClassificationBase.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/superpixel_classification/SuperpixelClassification/SuperpixelClassificationBase.py b/superpixel_classification/SuperpixelClassification/SuperpixelClassificationBase.py index 1d86403..99fd569 100644 --- a/superpixel_classification/SuperpixelClassification/SuperpixelClassificationBase.py +++ b/superpixel_classification/SuperpixelClassification/SuperpixelClassificationBase.py @@ -886,19 +886,21 @@ def predictLabels(self, gc, folderId, annotationName, itemsAndAnnot, features, m radius, magnification, certainty, batchSize, prog) prog.progress(1) - def main(self, args): + def main(self, args, gc = None): self.feature_is_image = args.feature != 'vector' self.certainty = args.certainty print('\n>> CLI Parameters ...\n') pprint.pprint(vars(args)) - gc = girder_client.GirderClient(apiUrl=args.girderApiUrl) - gc.token = args.girderToken - gc.authenticate('admin', 'password') - # dummy check to make sure we have access to server - if not [x for x in list(gc.listCollection()) if x['name'] == 'Active Learning']: - raise Exception("Unable to authenticate with girder") + if gc is None: + gc = girder_client.GirderClient(apiUrl=args.girderApiUrl) + gc.token = args.girderToken + gc.authenticate('admin', 'password') + + # check to make sure we have access to server + if not [x for x in list(gc.listCollection()) if x['name'] == 'Active Learning']: + raise Exception("Unable to authenticate with girder") with ProgressHelper( 'Superpixel Classification', 'Superpixel classification', args.progress) as prog: From 9111264e8b7e0ed990dd0a7bc14874bac402bde6 Mon Sep 17 00:00:00 2001 From: Anders Sildnes Date: Tue, 3 Jun 2025 09:10:53 -0500 Subject: [PATCH 5/5] Add simple script to inspect feature files --- tools/inspect_image_feature_file.py | 37 +++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) create mode 100644 tools/inspect_image_feature_file.py diff --git a/tools/inspect_image_feature_file.py b/tools/inspect_image_feature_file.py new file mode 100644 index 0000000..a93d911 --- /dev/null +++ b/tools/inspect_image_feature_file.py @@ -0,0 +1,37 @@ +''' +This script will open a feature file (.h5) and show a 3x3 grid of images. +This tool is useful if you suspect that features are not extracted properly, for example due to erroneous mask values/indexing. +''' + +import h5py +import matplotlib.pyplot as plt +import numpy as np +import sys + +if len(sys.argv) > 0: + feature_file = sys.argv[1] +else: + feature_file = "features.h5" + +# open the file +with h5py.File(feature_file, "r") as f: + # get the images dataset + images = f["images"] + # get the first 9 images + images = images[:9] + # reshape the images to 3x3 + #images = np.reshape(images, (3,3,100,100,3)) + # transpose the images to 3x3 + #images = np.transpose(images, (0,2,1,3,4)) + # flatten the images to 9x100x100x3 + #images = np.reshape(images, (9,100,100,3)) + + # hide axis from pyplot + plt.axis('off') + + # plot the images + for i in range(9): + plt.subplot(3,3,i+1) + plt.imshow(images[i]) + plt.show() + print(f"Image {i+1} is {images[i].shape}")