Skip to content

Commit

Permalink
Major Changes
Browse files Browse the repository at this point in the history
Added New Dataprocessors
  • Loading branch information
aibharata committed Jul 18, 2020
1 parent 3cfd2c1 commit 686e172
Show file tree
Hide file tree
Showing 26 changed files with 1,217 additions and 149 deletions.
1 change: 0 additions & 1 deletion .vscode/settings.json
Original file line number Diff line number Diff line change
@@ -1,3 +1,2 @@
{
"python.pythonPath": "C:\\Python35\\python3.exe"
}
2 changes: 1 addition & 1 deletion medicalai/__about__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,5 +14,5 @@

__project__ = "medicalai"
__author__ = "Vinayaka Jyothi - For AiBharata"
__version__ = "1.1.59"
__version__ = "1.2.2-rc"
__license__ = "Apache"
4 changes: 3 additions & 1 deletion medicalai/chief/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,4 +22,6 @@
from . import nnets
from . import model_metrics
from . import xai
from . import dataset_analysis as dataAnalyzer
from . import dataset_analysis as dataAnalyzer
from . import callbacks as callbacks
from . import dataloaders as dataloader
17 changes: 17 additions & 0 deletions medicalai/chief/callbacks/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
# Copyright 2020-2022 AIBharata Emerging Technologies Pvt. Ltd.

# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at

# http://www.apache.org/licenses/LICENSE-2.0

# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from __future__ import absolute_import

from .custom_callbacks import *
109 changes: 109 additions & 0 deletions medicalai/chief/callbacks/custom_callbacks.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
from __future__ import absolute_import
from tensorflow.keras.callbacks import Callback
from sklearn.metrics import roc_auc_score

class AUROC_Callback(Callback):
def __init__(self, generator, workers=1):
super().__init__()
self.generator = generator
self.workers = workers

def on_epoch_end(self, epoch, logs=None):
y_pred = self.model.predict(self.generator, workers=self.workers)
y_true= self.generator.labels
meanAUROC = roc_auc_score(y_true,y_pred)
print(' - mAUROC:', meanAUROC)

class MultipleClassAUROC(Callback):
'''
Sample Usage:
auroc = MultipleClassAUROC(
sequence=validation_sequence,
class_names=class_names,
weights_path=output_weights_path,
stats=training_stats,
workers=generator_workers,
)
'''
def __init__(self, sequence, class_names, weights_path, stats=None, workers=1):
super(Callback, self).__init__()
self.sequence = sequence
self.workers = workers
self.class_names = class_names
self.weights_path = weights_path
self.best_weights_path = os.path.join(
os.path.split(weights_path)[0],
"best_{}".format(os.path.split(weights_path)[1]),
)
self.best_auroc_log_path = os.path.join(
os.path.split(weights_path)[0],
"best_auroc.log",
)
self.stats_output_path = os.path.join(
os.path.split(weights_path)[0],
".training_stats.json"
)

# for resuming previous training
if stats:
self.stats = stats
else:
self.stats = {"best_mean_auroc": 0}

# aurocs log
self.aurocs = {}
for c in self.class_names:
self.aurocs[c] = []

def on_epoch_end(self, epoch, logs={}):
"""
Calculate the average AUROC and save the best model weights according
to this metric.
"""
print("\n*********************************")
self.stats["lr"] = float(kb.eval(self.model.optimizer.lr))
print("current learning rate: {}".format(self.stats['lr']))

"""
y_hat shape: (#samples, len(class_names))
y: [(#samples, 1), (#samples, 1) ... (#samples, 1)]
"""
y_hat = self.model.predict(self.sequence, workers=self.workers)
y = self.sequence.get_y_true()

print("*** epoch#{} dev auroc ***".format(epoch + 1))
current_auroc = []
for i in range(len(self.class_names)):
try:
score = roc_auc_score(y[:, i], y_hat[:, i])
except ValueError:
score = 0
self.aurocs[self.class_names[i]].append(score)
current_auroc.append(score)
print("{}. {}: {}".foramt(i+1,self.class_names[i],score))
print("*********************************")

# customize your multiple class metrics here
mean_auroc = np.mean(current_auroc)
print("mean auroc: {}".format(mean_auroc))
if mean_auroc > self.stats["best_mean_auroc"]:
print("update best auroc from {} to {}".format(self.stats['best_mean_auroc'],mean_auroc))

# 1. copy best model
shutil.copy(self.weights_path, self.best_weights_path)

# 2. update log file
print("update log file: {}".format(self.best_auroc_log_path))
with open(self.best_auroc_log_path, "a") as f:
f.write("(epoch#{}) auroc: {}, lr: {}\n".format(epoch + 1,mean_auroc,self.stats['lr']))

# 3. write stats output, this is used for resuming the training
with open(self.stats_output_path, 'w') as f:
json.dump(self.stats, f)

print("update model file: {} -> {}".format(self.weights_path, self.best_weights_path))
self.stats["best_mean_auroc"] = mean_auroc
print("*********************************")
return

94 changes: 63 additions & 31 deletions medicalai/chief/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,10 +35,13 @@
from .model_metrics import *
from .xai import *
from .uFuncs import *
from albumentations import Compose
import albumentations.augmentations.transforms as augmentations

physical_devices = tf.config.list_physical_devices('GPU')
if len(physical_devices)>1:
MULTI_GPU_MODE= True
print('[INFO]: Medicalai activated with MultiGPU Mode')
else:
MULTI_GPU_MODE= False
GPU_to_Use = 'all'
Expand Down Expand Up @@ -196,7 +199,7 @@ def train( model, x_train,
class_weights = None,
saveBestModel = False, bestModelCond = None,
validation_data = None, TRAIN_STEPS = None, TEST_STEPS = None,
verbose=None, y_train=None,
verbose=None, y_train=None, workers = 1
):
if callbacks is not None:
if ('tensorboard'in callbacks):
Expand Down Expand Up @@ -224,7 +227,8 @@ def train( model, x_train,
epochs=epochs,
validation_data=validation_data,
callbacks=callbacks,
class_weight = class_weights
class_weight = class_weights,
workers =workers
)
else:
result = model.fit(x_train,
Expand All @@ -233,7 +237,8 @@ def train( model, x_train,
validation_data=validation_data,
callbacks=callbacks,
verbose = verbose,
class_weight = class_weights
class_weight = class_weights,
workers = workers
)
return result.history

Expand Down Expand Up @@ -492,7 +497,7 @@ def preprocessor_from_meta(self, metaFile=None):
self.labelNames = self.preProcessor.labels

#@timeit
def predict(self, input, verbose=0):
def predict(self, input, verbose=1, safe=False , workers= 1):
"""
Peform prediction on Input. Input can be Numpy Array or Image or Data Generator (in case of Test/Validation).
Expand All @@ -519,24 +524,34 @@ def predict(self, input, verbose=0):
# Returns
Numpy.Array: of Predictions. Shape of Output [Number of Inputs, Number of Output Classes in Model]
"""
if hasattr(input, 'generator') and hasattr(input, 'STEP_SIZE'):
return self.model.predict(input.generator, verbose=1)
elif hasattr(input, 'image_data_generator'):
return self.model.predict(input, verbose=1)
elif hasattr(input, 'data') and not isinstance(input,np.ndarray):
return self.model.predict(input.data, verbose=verbose)
if hasattr(self, 'workers'):
workers = self.workers
else:
if self.preProcessor is not None:
input = self.preProcessor.processImage(input)
return self.model.predict(input, verbose=verbose)
workers = workers
if safe:
if hasattr(input, 'generator') and hasattr(input, 'STEP_SIZE'):
return self.model.predict(input.generator, steps=input.STEP_SIZE, verbose=1, workers=workers)
elif hasattr(input, 'image_data_generator'):
return self.model.predict(input, steps =(input.n/input.batch_size), verbose=1, workers=workers)
else:
if hasattr(input, 'generator') and hasattr(input, 'STEP_SIZE'):
return self.model.predict(input.generator, verbose=1, workers=workers)
elif hasattr(input, 'image_data_generator'):
return self.model.predict(input, verbose=1, workers=workers)
elif hasattr(input, 'data') and not isinstance(input,np.ndarray):
return self.model.predict(input.data, verbose=verbose, workers=workers)
else:
if self.labelNames is None:
if hasattr(input, 'labelNames'):
self.labelNames = input.labelNames if self.labelNames is None else self.labelNames
if isinstance(input,np.ndarray):
return self.model.predict(input, verbose=verbose)
if self.preProcessor is not None:
input = self.preProcessor.processImage(input)
return self.model.predict(input, verbose=verbose, workers=workers)
else:
return self.model.predict(input, verbose=verbose)
if self.labelNames is None:
if hasattr(input, 'labelNames'):
self.labelNames = input.labelNames if self.labelNames is None else self.labelNames
if isinstance(input,np.ndarray):
return self.model.predict(input, verbose=verbose, workers=workers)
else:
return self.model.predict(input, verbose=verbose, workers=workers)

#@timeit
def predict_pipeline(self, input):
Expand Down Expand Up @@ -670,7 +685,7 @@ def summary(self):
"""
return self.model.summary()

def generate_evaluation_report(self, testSet = None, predictions = None, printStat = False,returnPlot = False, showPlot= False, pdfName =None, **kwargs):
def generate_evaluation_report(self, testSet = None, predictions = None, printStat = True,returnPlot = False, showPlot= False, pdfName =None, **kwargs):
"""
Generate a comprehensive PDF report with model sensitivity, specificity, accuracy, confidence intervals,
ROC Curve Plot, Precision Recall Curve Plot, and Confusion Matrix Plot for each class.
Expand Down Expand Up @@ -800,12 +815,12 @@ class TRAIN_ENGINE(INFERENCE_ENGINE):
"""
def __init__(self, modelName=None):
super().__init__(modelName)

def train_and_save_model(self,AI_NAME, MODEL_SAVE_NAME, trainSet, testSet, OUTPUT_CLASSES, RETRAIN_MODEL, EPOCHS,
BATCH_SIZE=32, LEARNING_RATE=0.0001, convLayers=None,SAVE_BEST_MODEL=False, BEST_MODEL_COND=None,
callbacks=None, loss = 'sparse_categorical_crossentropy', optimizer=tf.keras.optimizers.Adam(lr=0.0001),
metrics = ['accuracy'], showModel = False,
CLASS_WEIGHTS=None, **kwargs):
metrics = ['accuracy'], showModel = False, workers = 1,
CLASS_WEIGHTS=None, **kwargs,):
""""
Main function that trains and saves a model. This automatically builds new model for given networks/AI or reload existing AI model.
This function can be used to retrain existing models or create new models.
Expand Down Expand Up @@ -880,51 +895,68 @@ def train_and_save_model(self,AI_NAME, MODEL_SAVE_NAME, trainSet, testSet, OUTPU
None: On successful completion saves the trained model.
"""
self.workers = workers
self.testSet = testSet
self.modelName = MODEL_SAVE_NAME
self.test_predictions = None
global MULTI_GPU_MODE, GPU_to_Use
if hasattr(trainSet, 'data'):
self.labelNames = trainSet.labelNames
if MULTI_GPU_MODE and GPU_to_Use.lower()=='all':
mirrored_strategy = tf.distribute.MirroredStrategy()
with mirrored_strategy.scope():
self.model = modelManager(AI_NAME= AI_NAME, convLayers= convLayers, modelName = MODEL_SAVE_NAME, x_train = trainSet.data, OUTPUT_CLASSES = OUTPUT_CLASSES, RETRAIN_MODEL= RETRAIN_MODEL)
self.model.compile(optimizer=optimizer,loss=loss,metrics=metrics)
BATCH_SIZE *= mirrored_strategy.num_replicas_in_sync
#BATCH_SIZE *= mirrored_strategy.num_replicas_in_sync
else:
self.model = modelManager(AI_NAME= AI_NAME, convLayers= convLayers, modelName = MODEL_SAVE_NAME, x_train = trainSet.data, OUTPUT_CLASSES = OUTPUT_CLASSES, RETRAIN_MODEL= RETRAIN_MODEL)
self.model.compile(optimizer=optimizer,loss=loss,metrics=metrics)
print(self.model.summary()) if showModel else None
print('[INFO]: BATCH_SIZE -',BATCH_SIZE)
self.result = train(self.model, trainSet.data, y_train= trainSet.labels, batch_size=BATCH_SIZE, epochs=EPOCHS,
validation_data=(testSet.data, testSet.labels), callbacks=callbacks, saveBestModel= SAVE_BEST_MODEL,
bestModelCond = BEST_MODEL_COND, TRAIN_STEPS = None, TEST_STEPS = None,
bestModelCond = BEST_MODEL_COND, TRAIN_STEPS = None, TEST_STEPS = None, workers = self.workers,
class_weights=CLASS_WEIGHTS)#['tensorboard'])
#self.model.evaluate(testSet.data, testSet.labels)

dataprc.metaSaver(trainSet.labelMap, trainSet.labelNames, normalize=trainSet.normalize,
rescale =None,
network_input_dim =trainSet.network_input_dim, samplingMethodName=trainSet.samplingMethodName, outputName= MODEL_SAVE_NAME)
else:
networkDim = np.zeros((1,)+trainSet.generator.image_shape)
from tensorflow.python.data.ops.dataset_ops import PrefetchDataset
if isinstance(trainSet.generator, PrefetchDataset):
for f,l in trainSet.generator.take(1):
inpSize = f.numpy().shape
networkDim = np.zeros((1,)+inpSize[1:])
networkInputSize = inpSize[1:]
rescaleValue = 1./255
else:
networkDim = np.zeros((1,)+trainSet.generator.image_shape)
networkInputSize = trainSet.generator.image_shape
try:
rescaleValue = trainSet.generator.image_data_generator.rescale
except:
rescaleValue = 1./255

self.labelNames = dataprc.safe_labelmap_converter(trainSet.labelMap)
if MULTI_GPU_MODE and GPU_to_Use.lower()=='all':
mirrored_strategy = tf.distribute.MirroredStrategy()
with mirrored_strategy.scope():
mirrored_strategy = tf.distribute.MirroredStrategy()
self.model = modelManager(AI_NAME= AI_NAME, modelName = MODEL_SAVE_NAME, x_train = networkDim, OUTPUT_CLASSES = OUTPUT_CLASSES, RETRAIN_MODEL= RETRAIN_MODEL, **kwargs)
self.model.compile(optimizer=optimizer,loss=loss,metrics=metrics)
BATCH_SIZE *= mirrored_strategy.num_replicas_in_sync
else:
self.model = modelManager(AI_NAME= AI_NAME, modelName = MODEL_SAVE_NAME, x_train = networkDim, OUTPUT_CLASSES = OUTPUT_CLASSES, RETRAIN_MODEL= RETRAIN_MODEL, **kwargs)
self.model.compile(optimizer=optimizer,loss=loss,metrics=metrics)
print(self.model.summary()) if showModel else None
print('[INFO]: BATCH_SIZE -',BATCH_SIZE)
self.result = train(self.model, trainSet.generator, batch_size=BATCH_SIZE, epochs=EPOCHS, validation_data=testSet.generator, callbacks=callbacks, saveBestModel= SAVE_BEST_MODEL, bestModelCond = BEST_MODEL_COND, TRAIN_STEPS = trainSet.STEP_SIZE, TEST_STEPS = testSet.STEP_SIZE, verbose=1,class_weights=CLASS_WEIGHTS)#['tensorboard'])
self.result = train(self.model, trainSet.generator, batch_size=BATCH_SIZE, epochs=EPOCHS, validation_data=testSet.generator,
callbacks=callbacks, saveBestModel= SAVE_BEST_MODEL, bestModelCond = BEST_MODEL_COND, TRAIN_STEPS = trainSet.STEP_SIZE,
TEST_STEPS = testSet.STEP_SIZE, verbose=1,class_weights=CLASS_WEIGHTS, workers = self.workers
)
#self.model.evaluate(testSet.generator,steps = testSet.STEP_SIZE)
dataprc.metaSaver(trainSet.labelMap, self.labelNames, normalize= None,
rescale = trainSet.generator.image_data_generator.rescale,
network_input_dim =trainSet.generator.image_shape, samplingMethodName=None, outputName= MODEL_SAVE_NAME)
rescale = rescaleValue,
network_input_dim =networkInputSize, samplingMethodName=None, outputName= MODEL_SAVE_NAME)

save_model_and_weights(self.model, outputName= MODEL_SAVE_NAME)

Expand Down
20 changes: 20 additions & 0 deletions medicalai/chief/dataloaders/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# Copyright 2020-2022 AIBharata Emerging Technologies Pvt. Ltd.

# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at

# http://www.apache.org/licenses/LICENSE-2.0

# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from __future__ import absolute_import

from .data_utils import *
from .dataset_processors import *
from .image_sequences import *
from .tf_image_pipelines import *
Loading

0 comments on commit 686e172

Please sign in to comment.