diff --git a/pycoal/mineral.py b/pycoal/mineral.py index 9f7f8469..a4a5fc60 100644 --- a/pycoal/mineral.py +++ b/pycoal/mineral.py @@ -24,6 +24,7 @@ import pycoal from pycoal.resampler import create_resampling_matrix +from pycoal import parallel import spectral import time import fnmatch @@ -86,9 +87,95 @@ def calculate_pixel_confidence_value(pixel, angles_m, resampling_matrix): calling function but are optionals and may vary from one classifier to another. """ -# to be merged -def SAM_pytorch(): - pass +def SAM_pytorch(image_file_name, classified_file_name, library_file_name, + scores_file_name=None, class_names=None, threshold=0.0, + in_memory=False, subset_rows=None, subset_cols=None): + + # load and optionally subset the spectral library + library = spectral.open_image(library_file_name) + if class_names is not None: + library = pycoal.mineral.MineralClassification.subset_spectral_library( + library, class_names) + + # open the image + image = spectral.open_image(image_file_name) + if subset_rows is not None and subset_cols is not None: + data = SubImage(image, subset_rows, subset_cols) + m = subset_rows[1] + n = subset_cols[1] + else: + if in_memory: + data = image.load() + else: + data = image.asarray() + m = image.shape[0] + n = image.shape[1] + + logging.info("Classifying a %iX%i image" % (m, n)) + + # define a resampler + # TODO detect and scale units + # TODO band resampler should do this + resampling_matrix = create_resampling_matrix( + [x / 1000 for x in image.bands.centers], library.bands.centers) + + # turn the data object into a tensor + data = numpy.array(data) + # float 64 + data = data.astype(numpy.dtype('f8')) + data = torch.from_numpy(data) + # allocate a zero-initialized MxN array for the classified image + classified = torch.zeros(shape=(m, n), dtype=numpy.uint16) + scored = torch.zeros(shape=(m, n), dtype=numpy.float64) + + # universal calculations for angles + # Adapted from Spectral library + ang_m = numpy.array(library.spectra, dtype=numpy.float64) + ang_m /= numpy.sqrt(numpy.einsum('ij,ij->i', ang_m, ang_m))[:, numpy.newaxis] + + # Create data loader class that will return range of data to model + # dataSet = ImageDataSet(m, x, data, classified, scored) + + classified, scored = parallel.data_parallel(resampling_matrix, data, classified, scored) + + classified = classified + 1 + + # Turns variables back to numpy + classified = classified.numpy() + scored = scored.numpy() + + indices = numpy.where(scored[:][:] <= threshold) + + classified[indices] = 0 + scored[indices] = 0 + + # save the classified image to a file + spectral.io.envi.save_classification(classified_file_name, classified, + class_names=['No data'] + + library.names, + metadata={'data ignore value': 0, + 'description': 'COAL ' + + pycoal.version + ' ' + 'mineral classified ' + 'image.', + 'map info': + image.metadata.get( + 'map info')}) + + # remove unused classes from the image + pycoal.mineral.MineralClassification.filter_classes(classified_file_name) + + if scores_file_name is not None: + # save the scored image to a file + spectral.io.envi.save_image(scores_file_name, scored, + dtype=numpy.float64, + metadata={'data ignore value': -50, + 'description': 'COAL ' + + pycoal.version + ' mineral ' + 'scored image.', + 'map info': image.metadata.get( + 'map info')}) + def SAM_joblib(image_file_name, classified_file_name, library_file_name, diff --git a/pycoal/parallel.py b/pycoal/parallel.py new file mode 100644 index 00000000..6ff480c1 --- /dev/null +++ b/pycoal/parallel.py @@ -0,0 +1,134 @@ +''' + Module Dependencies + torch: + torch.nn + Dataset + Dataloader +''' +import torch +import torch.nn as nn +from torch.autograd import Function + +import pycoal +from torch.utils.data.dataset import Dataset +import numpy +#from torch.utils.data import Dataloader + +import spectral +import time +import fnmatch +import shutil +import math +import sys +import os + +''' + Custom function that will perform the operations on given pixel +''' +class LinearFunction(Function): + ''' + ctx is a context object that is typically used to save data that will be needed for the backward + pass, but in this application it is used to store needed data like: + 1. Given data + 2. Given resampling_matrix + ''' + @staticmethod + def forward(x, y, data, resampling_matrix, classified, scored): + + # read the pixel from the file + pixel = data[x, y] + + # if it is not a no data pixel + if not numpy.isclose(pixel[0], -0.005) and not pixel[0] == -50: + + # resample the pixel ignoring NaNs from target bands that + # don't overlap + + # TODO fix spectral library so that bands are in order + resample_data = numpy.einsum('ij,j->i', resampling_matrix, pixel) + resample_data = numpy.nan_to_num(resample_data) + + # calculate spectral angles + # Adapted from Spectral library + norms = numpy.sqrt(numpy.einsum('i,i->', resample_data, resample_data)) + dots = numpy.einsum('i,ji->j', resample_data, ang_m) + dots = numpy.clip(dots / norms, -1, 1) + angles = numpy.arccos(dots) + + # normalize confidence values from [pi,0] to [0,1] + angles = 1 - angles / math.pi + + # get index of class with largest confidence value + classified[x, y] = numpy.argmax(angles) + + # get confidence value of the classified pixel + scored[x, y] = angles[classified[x, y]] + + return classified[x,y], scored[x,y] + + # This is typically for the gradient formula. Since we don't need a gradient for this project, we will be returning None + @staticmethod + def backward(): + return None + + +''' + +''' +def data_parallel(resampling_matrix, data, classified, scored): + # Get the number of available GPUs on device + deviceNum = torch.cuda.device_count() + # Create model + model = ImageParallelModel() + + # Check if more than one GPU is available + if deviceNum > 1: + model = nn.DataParallel(mode, device_ids=range(0,deviceNum)) + + return model(resmapling_matrix, data, classified, scored) + +''' + Data Model + Purpose: To utilize torch's parallelization modules for processing images + Input: input size (input_size), output size (output_size) + Output: +''' +class ImageParallelModel(nn.Module): + ''' + This is where all used models are instantiated + ''' + def __init__(self): + super(ImageParallelModel, self).__init__() + self.func = LinearFunction() + + def forward(self, resampling_matrix, data, classified, scored): + for x in range(0, data.get_shape()[0].value): + for y in range(0, data.get_shape()[1].value): + classified[x,y], scored[x,y] = self.func(x,y,data,resampling_matrix, classified, scored) + + return classified, scored + + +''' + Dataset class +''' +class ImageDataSet(Dataset): + def __init__(self, x, y, image, classified, scored): + self.x = x + self.y = y + self.image = image + self.classified = classified + self.scored = scored + self.resample = resample + + return + def __len__(self): + return self.x + # Returns tensor column + def __get__(self, x,y): + data = torch.narrow(self.image, 0, x, y - x) + classified = torch.narrow(self.classified, 0, x, y - x) + scored = torch.narrow(self.scored, 0, x, y - x) + + return (data, classified, scored) +