diff --git a/Analysis.py b/Analysis.py index 22c6d98..1b4fc86 100644 --- a/Analysis.py +++ b/Analysis.py @@ -1,75 +1,48 @@ # Analysis import numpy as np +import matplotlib as mpl +mpl.use('pdf') +import matplotlib.pyplot as plt +from sklearn.metrics import roc_curve, auc -def ClassificationAnalysis(MyModel,Test_X,Test_Y,BatchSize, SignalClassIndex=5): - import matplotlib as mpl - mpl.use('pdf') - import matplotlib.pyplot as plt +mpColors = ['red', 'darkorange', 'lawngreen', 'green', 'lightseagreen', 'cyan', 'royalblue', 'blue', 'blueviolet', 'magenta', 'hotpink'] - from sklearn.metrics import roc_curve, auc +def ClassificationAnalysis (MyModel, Test_X, Test_Y, BatchSize, SignalClassIndex=5): + result = MyModel.Model.predict (Test_X, batch_size=BatchSize) - print "Prediction Analysis." - result = MyModel.Model.predict(Test_X, batch_size=BatchSize) - - fpr, tpr, _ = roc_curve(Test_Y[:,SignalClassIndex], - result[:,SignalClassIndex]) - roc_auc = auc(fpr, tpr) - - lw=2 - - plt.plot(fpr,tpr,color='darkorange', - lw=lw, label='ROC curve (area = %0.2f)' % roc_auc) - - print "ROC AUC: ",roc_auc - - plt.plot([0, 1], [0, 1], color='navy', lw=lw, linestyle='--') - plt.xlim([0.0, 1.0]) - plt.ylim([0.0, 1.05]) - - plt.xlabel('False Positive Rate') - plt.ylabel('True Positive Rate') - - plt.legend(loc="lower right") + fpr, tpr, _ = roc_curve (Test_Y[:,SignalClassIndex], + result[:,SignalClassIndex]) + roc_auc = auc (fpr, tpr) - plt.savefig(MyModel.OutDir+"/ROC.pdf") - - -mpColors=["blue","green","red","cyan","magenta","yellow","black","white"] + lw=2 -def MultiClassificationAnalysis(MyModel,Test_X,Test_Y,BatchSize): - import matplotlib as mpl - mpl.use('pdf') - import matplotlib.pyplot as plt + plt.plot (fpr,tpr,color=mpColors[SignalClassIndex], + lw=lw, label='ROC curve (area = %0.2f)' % roc_auc) - from sklearn.metrics import roc_curve, auc + print 'ROC', SignalClassIndex, 'AUC:', roc_auc + pass - print "Prediction Analysis." - result = MyModel.Model.predict(Test_X, batch_size=BatchSize) - - NClasses=Test_Y.shape[1] - for ClassIndex in xrange(0,NClasses): - fpr, tpr, _ = roc_curve(Test_Y[:,ClassIndex], - result[:,ClassIndex]) - roc_auc = auc(fpr, tpr) +def MultiClassificationAnalysis (MyModel, Test_X, Test_Y, BatchSize): + print 'Prediction Analysis.' + result = MyModel.Model.predict (Test_X, batch_size=BatchSize) - lw=2 + NClasses = Test_Y.shape[1] - plt.plot(fpr,tpr,color=mpColors[ClassIndex], - lw=lw, label='ROC curve (area = %0.2f)' % roc_auc) + for ClassIndex in xrange(NClasses): + ClassificationAnalysis (MyModel, Test_X, Test_Y, BatchSize, ClassIndex) + pass - print "ROC ",ClassIndex," AUC: ",roc_auc + lw=2 - plt.plot([0, 1], [0, 1], color='navy', lw=lw, linestyle='--') - plt.xlim([0.0, 1.0]) - plt.ylim([0.0, 1.05]) + plt.plot ([0, 1], [0, 1], color='navy', lw=lw, linestyle='--') + plt.xlim ([0.0, 1.0]) + plt.ylim ([0.0, 1.05]) - plt.xlabel('False Positive Rate') - plt.ylabel('True Positive Rate') + plt.xlabel ('False Positive Rate') + plt.ylabel ('True Positive Rate') - plt.legend(loc="lower right") + plt.legend (loc='lower right') - - plt.savefig(MyModel.OutDir+"/ROC.pdf") - + plt.savefig (MyModel.OutDir + '/ROC.pdf') return result diff --git a/Arguments.py b/Arguments.py index 901087d..11baa02 100644 --- a/Arguments.py +++ b/Arguments.py @@ -1,49 +1,47 @@ +import argparse + # Configuration of this job parser = argparse.ArgumentParser() -parser.add_argument('-C', '--config',default="EventClassificationDNN/ScanConfig.py") -parser.add_argument('-I', '--inputdata',default="/data/afarbin/crogan/h5/mP1000_mC200_mX100.h5") -parser.add_argument('-L', '--LoadModel',default=False) -parser.add_argument('--gpu', dest='gpuid', default="") -parser.add_argument('--cpu', action="store_true") -parser.add_argument('--NoTrain', action="store_true") -parser.add_argument('-s',"--hyperparamset", default="0") -parser.add_argument('-v',"--varset", default="0") -parser.add_argument('--NoResults', action="store_false") +parser.add_argument ('-C', '--config', default='EventClassificationDNN/ScanConfig.py') +parser.add_argument ('-I', '--inputdata', default='/data/afarbin/crogan/h5/mP1000_mC200_mX100.h5') +parser.add_argument ('-L', '--LoadModel', default=False) +parser.add_argument ('--gpu', dest='gpuid', default='') +parser.add_argument ('--cpu', action='store_true') +parser.add_argument ('--NoTrain', action='store_true') +parser.add_argument ('-s', '--hyperparamset', type=int, default=0) +parser.add_argument ('-v', '--varset', type=int, default=0) +parser.add_argument ('--NoResults', action='store_false') args = parser.parse_args() -UseGPU=not args.cpu -gpuid=args.gpuid -if args.hyperparamset: - HyperParamSet = int(args.hyperparamset) - -if args.varset: - VarSet = int(args.varset) - - print "Using VarSet: ",VarSet +UseGPU = not args.cpu +gpuid = args.gpuid +HyperParamSet = args.hyperparamset -ConfigFile=args.config -InputData=args.inputdata +VarSet = args.varset +print 'Using VarSet:', VarSet -LoadModel=args.LoadModel +ConfigFile = args.config +InputData = args.inputdata +LoadModel = args.LoadModel # Configuration from PBS: -if "PBS_ARRAYID" in os.environ: - HyperParamSet = int(os.environ["PBS_ARRAYID"]) +if 'PBS_ARRAYID' in os.environ: HyperParamSet = int(os.environ['PBS_ARRAYID']) -if "PBS_QUEUE" in os.environ: - if "cpu" in os.environ["PBS_QUEUE"]: - UseGPU=False - if "gpu" in os.environ["PBS_QUEUE"]: - UseGPU=True - gpuid=int(os.environ["PBS_QUEUE"][3:4]) +if 'PBS_QUEUE' in os.environ: + if 'cpu' in os.environ['PBS_QUEUE']: UseGPU = False + if 'gpu' in os.environ['PBS_QUEUE']: + UseGPU = True + gpuid = int(os.environ['PBS_QUEUE'][3:4]) + pass + pass if UseGPU: - print "Using GPU",gpuid - os.environ['THEANO_FLAGS'] = "mode=FAST_RUN,device=gpu%s,floatX=float32,force_device=True" % (gpuid) -else: - print "Using CPU." + print 'Using GPU', gpuid + os.environ['THEANO_FLAGS'] = 'mode=FAST_RUN,device=gpu%s,floatX=float32,force_device=True' % (gpuid) + pass +else: print 'Using CPU.' -Train= not args.NoTrain +Train = not args.NoTrain -WriteResults= args.NoResults +WriteResults = args.NoResults diff --git a/CSVWriter.py b/CSVWriter.py index 681d389..8dd4b5d 100644 --- a/CSVWriter.py +++ b/CSVWriter.py @@ -1,27 +1,20 @@ -import numpy as np - -def CSVWriter(filename,X,Y,R): - - names=X.dtype.names - - colnames="" +#!/usr/bin/env python - for n in names: - colnames+=n+"/f:" +import numpy as np -# colnames+="TrueClass/f:" - for i in xrange(0,Y.shape[1]): - colnames+="true_"+str(i)+"/f:" +def CSVWriter (filename, X, Y, R, arrType): + names = X.dtype.names + colnames = [] - for i in xrange(0,R.shape[1]): - colnames+="predict_"+str(i)+"/f:" + for n in names: colnames.append(n) + for i in xrange(0, Y.shape[1]): colnames.append('true_' + str(i)) + for i in xrange(0, R.shape[1]): colnames.append('predict_' + str(i)) f = open(filename, 'w') - f.write(colnames[:-1]+"\n") - - X0=X.view(np.float32).reshape(X.shape + (-1,)) + f.write(','.join(colnames) + '\n') - YI=np.nonzero(Y)[1] - out=np.concatenate((X0,Y,R),axis=1) + X0 = X.view(arrType).reshape(X.shape + (-1,)) + out = np.concatenate((X0,Y,R), axis=1) - np.savetxt(f,out,delimiter=',') + np.savetxt(f, out, delimiter=',') + pass diff --git a/Classification.py b/Classification.py index b620365..07de08f 100644 --- a/Classification.py +++ b/Classification.py @@ -70,4 +70,4 @@ def Build(self): self.Model=model def Compile(self, Loss="categorical_crossentropy", Optimizer="rmsprop"): - self.Model.compile(loss=loss, optimizer=optimizer,metrics=["accuracy"]) + self.Model.compile(loss=Loss, optimizer=Optimizer,metrics=["accuracy"]) diff --git a/Experiment.py b/Experiment.py index 09f8ef9..b0f6673 100644 --- a/Experiment.py +++ b/Experiment.py @@ -1,74 +1,89 @@ -import sys,os,argparse +import datetime +a = datetime.datetime.now() +print a.ctime() -import h5py -import numpy as np +import os # Parse Arguments -execfile("EventClassificationDNN/Arguments.py") +execfile ('EventClassificationDNN/Arguments.py') # Now load the Hyperparameters -execfile(ConfigFile) +import numpy as np +execfile (ConfigFile) -if "Config" in dir(): +if 'Config' in dir(): for a in Config: - exec(a+"="+str(Config[a])) + if a != 'arrType': exec (a + '=' + str(Config[a])) + pass + pass # Load the Data +import h5py from EventClassificationDNN.MultiClassTools import * -from EventClassificationDNN.InputFiles import Samples - -(Train_X, Train_Y), (Test_X, Test_Y), ClassIndex=LoadData(Samples,.1,MaxEvents=MaxEvents) +(Train_X, Train_Y), (Test_X, Test_Y), ClassIndex = LoadData (Samples, .1, MaxEvents=MaxEvents) + # Select Variables To use in training # To get the field names, just look at Fields=Train_X.dtype.names -from EventClassificationDNN.InputVars import FieldGroups, SelectedFields # Keep the original data before renomalizing... will use this in output -Train_X0=Train_X.copy() -Test_X0=Test_X.copy() - -GroupMins=[0]*len(FieldGroups) -GroupMaxs=[0]*len(FieldGroups) - -# Normalize Ranges within variable groups e.g. masses, angles (phi, eta, cos separately) -for Fs in xrange(0,len(FieldGroups)): - Mins=[] - Maxs=[] - for varI in xrange(0,len(FieldGroups[Fs])): - Mins+=[np.min(Train_X0[FieldGroups[Fs][varI]])] - Maxs+=[np.max(Train_X0[FieldGroups[Fs][varI]])] - - GroupMins[Fs]=min(Mins) - GroupMaxs[Fs]=max(Maxs) - - for var in FieldGroups[Fs]: - yy=Train_X[var] - yy[:]= 1./(GroupMaxs[Fs]-GroupMins[Fs]) * (yy-GroupMins[Fs]) - - yy1=Test_X[var] - yy1[:]= 1./(GroupMaxs[Fs]-GroupMins[Fs])* (yy1-GroupMins[Fs]) - -Train_X_N=Train_X +Train_X0 = Train_X.copy() +Test_X0 = Test_X.copy() + +#### Shifting/Scaling normalization +print 'Normalizing ...' +for obs in Observables: + print ' ', obs + yy = Train_X[obs] + yy1 = Test_X[obs] + if Observables[obs].has_key('gaus'): + M = np.mean(Train_X[obs]) + V = np.var(Test_X[obs]) + yy[:] = (yy - M) / V + 0.5 + yy1[:] = (yy1 - M) / V + 0.5 + pass + else: + if Observables[obs].has_key('trim'): + minval = np.nanpercentile(Train_X[obs], Observables[obs]['trim'][0]) + maxval = np.nanpercentile(Train_X[obs], Observables[obs]['trim'][1]) + pass + elif Observables[obs].has_key('range'): + minval = Observables[obs]['range'][0] + maxval = Observables[obs]['range'][1] + pass + else: + minval = np.min(Train_X[obs]) + maxval = np.max(Train_X[obs]) + pass + yy[:] = 1./(maxval-minval) * (yy-minval) + yy1[:] = 1./(maxval-minval) * (yy1-minval) + pass + pass +pass + +#Train_X_N = Train_X # Keep Only selected Variables -Train_X=Train_X[SelectedFields[VarSet]] -Test_X=Test_X[SelectedFields[VarSet]] +Train_X = Train_X[SelectedFields[VarSet]] +Test_X = Test_X[SelectedFields[VarSet]] -Train_X_S=Train_X +#Train_X_S = Train_X # Now Lets Simplify the structure (Note this requires everything to be a float) -Train_X=Train_X.view(np.float32).reshape(Train_X.shape + (-1,)) -Test_X=Test_X.view(np.float32).reshape(Test_X.shape + (-1,)) +# If you get an error that the input size isn't right, try changing float below to float32 or float64 +Train_X = Train_X.view(Config['arrType']).reshape(Train_X.shape + (-1,)) +Test_X = Test_X.view(Config['arrType']).reshape(Test_X.shape + (-1,)) # Protect against divide by zero! -Train_X=np.nan_to_num(Train_X) -Test_X=np.nan_to_num(Test_X) +Train_X = np.nan_to_num(Train_X) +Test_X = np.nan_to_num(Test_X) # Get some Inof -N_Inputs=len(SelectedFields[VarSet]) -N_Classes=np.shape(Train_Y)[1] -print "N Inputs:",N_Inputs -print "N Classes:",N_Classes +N_Inputs = len(SelectedFields[VarSet]) +N_Classes = np.shape(Train_Y)[1] +print 'N Inputs:', N_Inputs +print 'Width:', Width +print 'N Classes:', N_Classes # Now Build the Model from DLTools.ModelWrapper import * @@ -77,48 +92,53 @@ from EventClassificationDNN.Classification import FullyConnectedClassification if LoadModel: - print "Loading Model From:",LoadModel - if LoadModel[-1]=="/": - LoadModel=LoadModel[:-1] - Name=os.path.basename(LoadModel) - MyModel=ModelWrapper(Name) - MyModel.InDir=LoadModel + print 'Loading Model From:', LoadModel + if LoadModel[-1]=='/': LoadModel = LoadModel[:-1] + Name = os.path.basename (LoadModel) + MyModel = ModelWrapper (Name) + MyModel.InDir = LoadModel MyModel.Load() + pass else: - MyModel=FullyConnectedClassification(Name,N_Inputs,Width,Depth,N_Classes,WeightInitialization) - MyModel.Build() + Name += '_%s' % VarSet + print 'Model Filename:', Name + MyModel = FullyConnectedClassification (Name, N_Inputs, Width, Depth, N_Classes, WeightInitialization) + MyModel.Build () + pass -MyModel.MetaData["Config"]=Config +MyModel.MetaData['Config'] = Config # Compile the Model -print "Compiling the Model... this will take a while." +print 'Compiling the Model... this will take a while.' -optimizer="sgd" -MyModel.Compile(Loss=loss, Optimizer=optimizer) +optimizer = 'sgd' +MyModel.Compile (Loss=loss, Optimizer=optimizer) -model=MyModel.Model +model = MyModel.Model # Print the summary -model.summary() +model.summary () if Train: - print "Training." - hist=MyModel.Train(Train_X, Train_Y, Epochs, BatchSize) - + print 'Training.' + hist = MyModel.Train(Train_X, Train_Y, Epochs, BatchSize) score = model.evaluate(Test_X, Test_Y , batch_size=BatchSize) - - print "Final Score:",score - - MyModel.MetaData["FinalScore"]=score + print 'Final Score:',score + MyModel.MetaData['FinalScore'] = score + pass # Save -MyModel.Save() +MyModel.Save () # Analysis from EventClassificationDNN.Analysis import MultiClassificationAnalysis -result=MultiClassificationAnalysis(MyModel,Test_X,Test_Y,BatchSize ) +result = MultiClassificationAnalysis (MyModel, Test_X, Test_Y, BatchSize) # Dump out the predictions added to the input if WriteResults: - print "Writing Results." + print 'Writing Results.' from EventClassificationDNN.CSVWriter import * - CSVWriter(MyModel.OutDir+"/Result.csv",Test_X0,Test_Y,result) + CSVWriter (MyModel.OutDir+'/Result.csv', Test_X0, Test_Y, result, Config['arrType']) + pass + +a = datetime.datetime.now() +print a.ctime() diff --git a/InputFiles.py b/InputFiles.py deleted file mode 100644 index e9706c6..0000000 --- a/InputFiles.py +++ /dev/null @@ -1,37 +0,0 @@ - -# Define the Input Files -InputFiles=["mP1000_mC150_mX100.h5", - "mP1000_mC400_mX100.h5", - "mP1000_mC600_mX100.h5", - "mP1000_mC900_mX100.h5", - "mP1000_mC200_mX100.h5", - "mP1000_mC500_mX100.h5", - "mP1000_mC700_mX100.h5", - "mP1000_mC950_mX100.h5", - "mP1000_mC300_mX100.h5", - "mP1000_mC550_mX100.h5", - "mP1000_mC800_mX100.h5"] -Files=[] - -# Select datasets (formerly TTrees in original ROOT file) - - -for InputData in InputFiles: - InputData="/scratch/data-backup/afarbin/crogan/h5/"+InputData - Files+= [ - [InputData, "AA_Gen"], - [InputData, "AB_Gen"], - [InputData, "BA_Gen"], - [InputData, "BB_Gen"] - ] - -Samples=[] - -for F in Files: - if type(F) != str: - name=F[1] - F=F[0] - Samples.append([F,name]) - else: - name=F.split(".")[0].split("/")[-1] - Samples.append([F,name+"_SRAll"]) diff --git a/InputVars.py b/InputVars.py deleted file mode 100644 index 311e70c..0000000 --- a/InputVars.py +++ /dev/null @@ -1,23 +0,0 @@ -# Select Variables To use in training - -# On the JigSaw Dataset Fields will be the following: -# - -FieldGroups = [ - ['mP', 'mC', 'mX', 'METx', 'METy', 'L1_pT', 'L1_M', 'L2_pT','L2_M', 'B1_pT','B1_M', 'B2_pT','B2_M', 'MPP_AA', 'Eb_a_AA', 'Eb_b_AA', 'El_a_AA', 'El_b_AA', 'MPP_BB', 'Eb_a_BB', 'Eb_b_BB', 'El_a_BB', 'El_b_BB','MPP_AB', 'Eb_a_AB', 'Eb_b_AB', 'El_a_AB', 'El_b_AB', 'MPP_BA', 'Eb_a_BA', 'Eb_b_BA', 'El_a_BA', 'El_b_BA'], - ['L1_eta', 'L2_eta', 'B1_eta','B2_eta'], - ['L1_phi', 'L2_phi','B1_phi', 'B2_phi'], - ['cosPP_AA', 'cosPa_AA', 'cosPb_AA', 'cosCa_AA', 'cosCb_AA','cosPP_BB', 'cosPa_BB', 'cosPb_BB', 'cosCa_BB', 'cosCb_BB','cosPP_AB', 'cosPa_AB', 'cosPb_AB', 'cosCa_AB', 'cosCb_AB','cosPP_BA', 'cosPa_BA', 'cosPb_BA', 'cosCa_BA', 'cosCb_BA'], - ['dphi_PP_Pa_AA', 'dphi_PP_Pb_AA', 'dphi_Pa_Ca_AA', 'dphi_Pb_Cb_AA', 'dphi_PP_Pa_BB', 'dphi_PP_Pb_BB', 'dphi_Pa_Ca_BB', 'dphi_Pb_Cb_BB', 'dphi_PP_Pa_AB', 'dphi_PP_Pb_AB', 'dphi_Pa_Ca_AB', 'dphi_Pb_Cb_AB','dphi_PP_Pa_BA', 'dphi_PP_Pb_BA', 'dphi_Pa_Ca_BA', 'dphi_Pb_Cb_BA'] ] - -SelectedFields = [ - ['mP', 'mC', 'mX', 'L1_pT', 'L1_eta', 'L1_phi', 'L1_M', 'L2_pT', 'L2_eta', 'L2_phi', 'L2_M', 'B1_pT', 'B1_eta', 'B1_phi', 'B1_M', 'B2_pT', 'B2_eta', 'B2_phi', 'B2_M', 'MPP_AA', 'Eb_a_AA', 'Eb_b_AA', 'El_a_AA', 'El_b_AA', 'cosPP_AA', 'cosPa_AA', 'cosPb_AA', 'cosCa_AA', 'cosCb_AA', 'dphi_PP_Pa_AA', 'dphi_PP_Pb_AA', 'dphi_Pa_Ca_AA', 'dphi_Pb_Cb_AA', 'MPP_BB', 'Eb_a_BB', 'Eb_b_BB', 'El_a_BB', 'El_b_BB', 'cosPP_BB', 'cosPa_BB', 'cosPb_BB', 'cosCa_BB', 'cosCb_BB', 'dphi_PP_Pa_BB', 'dphi_PP_Pb_BB', 'dphi_Pa_Ca_BB', 'dphi_Pb_Cb_BB', 'MPP_AB', 'Eb_a_AB', 'Eb_b_AB', 'El_a_AB', 'El_b_AB', 'cosPP_AB', 'cosPa_AB', 'cosPb_AB', 'cosCa_AB', 'cosCb_AB', 'dphi_PP_Pa_AB', 'dphi_PP_Pb_AB', 'dphi_Pa_Ca_AB', 'dphi_Pb_Cb_AB', 'MPP_BA', 'Eb_a_BA', 'Eb_b_BA', 'El_a_BA', 'El_b_BA', 'cosPP_BA', 'cosPa_BA', 'cosPb_BA', 'cosCa_BA', 'cosCb_BA', 'dphi_PP_Pa_BA', 'dphi_PP_Pb_BA', 'dphi_Pa_Ca_BA', 'dphi_Pb_Cb_BA'], - - ['MPP_AA', 'Eb_a_AA', 'Eb_b_AA', 'El_a_AA', 'El_b_AA', 'cosPP_AA', 'cosPa_AA', 'cosPb_AA', 'cosCa_AA', 'cosCb_AA', 'dphi_PP_Pa_AA', 'dphi_PP_Pb_AA', 'dphi_Pa_Ca_AA', 'dphi_Pb_Cb_AA', 'MPP_BB', 'Eb_a_BB', 'Eb_b_BB', 'El_a_BB', 'El_b_BB', 'cosPP_BB', 'cosPa_BB', 'cosPb_BB', 'cosCa_BB', 'cosCb_BB', 'dphi_PP_Pa_BB', 'dphi_PP_Pb_BB', 'dphi_Pa_Ca_BB', 'dphi_Pb_Cb_BB', 'MPP_AB', 'Eb_a_AB', 'Eb_b_AB', 'El_a_AB', 'El_b_AB', 'cosPP_AB', 'cosPa_AB', 'cosPb_AB', 'cosCa_AB', 'cosCb_AB', 'dphi_PP_Pa_AB', 'dphi_PP_Pb_AB', 'dphi_Pa_Ca_AB', 'dphi_Pb_Cb_AB', 'MPP_BA', 'Eb_a_BA', 'Eb_b_BA', 'El_a_BA', 'El_b_BA', 'cosPP_BA', 'cosPa_BA', 'cosPb_BA', 'cosCa_BA', 'cosCb_BA', 'dphi_PP_Pa_BA', 'dphi_PP_Pb_BA', 'dphi_Pa_Ca_BA', 'dphi_Pb_Cb_BA'], - - ['METx', 'METy', 'L1_pT', 'L1_eta', 'L1_phi', 'L2_pT', 'L2_eta', 'L2_phi', 'B1_pT', 'B1_eta', 'B1_phi', 'B2_pT', 'B2_eta', 'B2_phi'], - - ['MPP_AA', 'Eb_a_AA', 'Eb_b_AA', 'El_a_AA', 'El_b_AA', 'cosPP_AA', 'cosPa_AA', 'cosPb_AA', 'cosCa_AA', 'cosCb_AA', 'dphi_PP_Pa_AA', 'dphi_PP_Pb_AA', 'dphi_Pa_Ca_AA', 'dphi_Pb_Cb_AA'], - - ['mP', 'mC', 'mX','METx', 'METy', 'L1_pT', 'L1_eta', 'L1_phi', 'L2_pT', 'L2_eta', 'L2_phi', 'B1_pT', 'B1_eta', 'B1_phi', 'B2_pT', 'B2_eta', 'B2_phi'], -] diff --git a/SUSYConfig.py b/SUSYConfig.py new file mode 100644 index 0000000..9d11222 --- /dev/null +++ b/SUSYConfig.py @@ -0,0 +1,168 @@ +from DLTools.Permutator import * + +#Previously in InputFiles.py +# Define the Input Files +InputFiles = ['mP1000_mC150_mX100.h5', + 'mP1000_mC200_mX100.h5', + 'mP1000_mC300_mX100.h5', + 'mP1000_mC400_mX100.h5', + 'mP1000_mC500_mX100.h5', + 'mP1000_mC550_mX100.h5', + 'mP1000_mC600_mX100.h5', + 'mP1000_mC700_mX100.h5', + 'mP1000_mC800_mX100.h5', + 'mP1000_mC900_mX100.h5', + 'mP1000_mC950_mX100.h5'] + +# Select datasets (formerly TTrees in original ROOT file) +Samples = [] +for InputData in InputFiles: + InputData = os.getenv('SampleDirSUSY') + '/' + InputData + Samples += [ [InputData, 'AA_Gen'], + [InputData, 'AB_Gen'], + [InputData, 'BA_Gen'], + [InputData, 'BB_Gen'] ] + pass + +#Previously in InputVars.py +# Select Variables To use in training + +# used for scaling +Observables = { + 'mP': {'range': [1000, 1001]}, + 'mC': {'range': [150, 950]}, + 'mX': {'range': [100, 101]}, + 'B1_pT': {'trim': [0, 99]}, + 'B1_phi': {'range': [-np.pi, np.pi]}, + 'B1_eta': {'range': [-2.5, 2.5]}, + 'B1_M': {'trim': [0, 99]}, + 'B2_pT': {'trim': [0, 99]}, + 'B2_phi': {'range': [-np.pi, np.pi]}, + 'B2_eta': {'range': [-2.5, 2.5]}, + 'B2_M': {'trim': [0, 99]}, + 'L1_pT': {'trim': [0, 99]}, + 'L1_phi': {'range': [-np.pi, np.pi]}, + 'L1_eta': {'range': [-2.5, 2.5]}, + 'L1_M': {'trim': [0, 99]}, + 'L2_pT': {'trim': [0, 99]}, + 'L2_phi': {'range': [-np.pi, np.pi]}, + 'L2_eta': {'range': [-2.5, 2.5]}, + 'L2_M': {'trim': [0, 99]}, + 'METx': {'trim': [0, 99]}, + 'METy': {'trim': [0, 99]}, + 'MPP_AA': {}, + 'Eb_a_AA': {}, + 'Eb_b_AA': {}, + 'El_a_AA': {}, + 'El_b_AA': {}, + 'MPP_BB': {}, + 'Eb_a_BB': {}, + 'Eb_b_BB': {}, + 'El_a_BB': {}, + 'El_b_BB': {}, + 'MPP_AB': {}, + 'Eb_a_AB': {}, + 'Eb_b_AB': {}, + 'El_a_AB': {}, + 'El_b_AB': {}, + 'MPP_BA': {}, + 'Eb_a_BA': {}, + 'Eb_b_BA': {}, + 'El_a_BA': {}, + 'El_b_BA': {}, + 'cosPP_AA': {'range': [-1, 1]}, + 'cosPP_AB': {'range': [-1, 1]}, + 'cosPP_BA': {'range': [-1, 1]}, + 'cosPP_BB': {'range': [-1, 1]}, + 'cosPa_AA': {'range': [-1, 1]}, + 'cosPa_AB': {'range': [-1, 1]}, + 'cosPa_BA': {'range': [-1, 1]}, + 'cosPa_BB': {'range': [-1, 1]}, + 'cosPb_AA': {'range': [-1, 1]}, + 'cosPb_AB': {'range': [-1, 1]}, + 'cosPb_BA': {'range': [-1, 1]}, + 'cosPb_BB': {'range': [-1, 1]}, + 'cosCa_AA': {'range': [-1, 1]}, + 'cosCa_AB': {'range': [-1, 1]}, + 'cosCa_BA': {'range': [-1, 1]}, + 'cosCa_BB': {'range': [-1, 1]}, + 'cosCb_AA': {'range': [-1, 1]}, + 'cosCb_AB': {'range': [-1, 1]}, + 'cosCb_BA': {'range': [-1, 1]}, + 'cosCb_BB': {'range': [-1, 1]}, + 'dphi_PP_Pa_AA': {'range': [0, 2.*np.pi]}, + 'dphi_PP_Pa_BB': {'range': [0, 2.*np.pi]}, + 'dphi_PP_Pa_AB': {'range': [0, 2.*np.pi]}, + 'dphi_PP_Pa_BA': {'range': [0, 2.*np.pi]}, + 'dphi_PP_Pb_AA': {'range': [0, 2.*np.pi]}, + 'dphi_PP_Pb_BB': {'range': [0, 2.*np.pi]}, + 'dphi_PP_Pb_AB': {'range': [0, 2.*np.pi]}, + 'dphi_PP_Pb_BA': {'range': [0, 2.*np.pi]}, + 'dphi_Pa_Ca_AA': {'range': [0, 2.*np.pi]}, + 'dphi_Pa_Ca_BB': {'range': [0, 2.*np.pi]}, + 'dphi_Pa_Ca_AB': {'range': [0, 2.*np.pi]}, + 'dphi_Pa_Ca_BA': {'range': [0, 2.*np.pi]}, + 'dphi_Pb_Cb_AA': {'range': [0, 2.*np.pi]}, + 'dphi_Pb_Cb_BB': {'range': [0, 2.*np.pi]}, + 'dphi_Pb_Cb_AB': {'range': [0, 2.*np.pi]}, + 'dphi_Pb_Cb_BA': {'range': [0, 2.*np.pi]}, + } + +SelectedFields = [ + # detector reconstruction + ['METx', 'METy', + 'L1_pT', 'L1_eta', 'L1_phi', 'L1_M', + 'L2_pT', 'L2_eta', 'L2_phi', 'L2_M', + 'B1_pT', 'B1_eta', 'B1_phi', 'B1_M', + 'B2_pT', 'B2_eta', 'B2_phi', 'B2_M'], + + # recursive jigsaw reconstruction, just mass/energy + ['MPP_AA', 'Eb_a_AA', 'Eb_b_AA', 'El_a_AA', 'El_b_AA', + 'MPP_AB', 'Eb_a_AB', 'Eb_b_AB', 'El_a_AB', 'El_b_AB', + 'MPP_BA', 'Eb_a_BA', 'Eb_b_BA', 'El_a_BA', 'El_b_BA', + 'MPP_BB', 'Eb_a_BB', 'Eb_b_BB', 'El_a_BB', 'El_b_BB'], + + # single topologic reconstruction + ['MPP_AA', 'Eb_a_AA', 'Eb_b_AA', 'El_a_AA', 'El_b_AA', + 'cosPP_AA', 'cosPa_AA', 'cosPb_AA', 'cosCa_AA', 'cosCb_AA', + 'dphi_PP_Pa_AA', 'dphi_PP_Pb_AA', 'dphi_Pa_Ca_AA', 'dphi_Pb_Cb_AA'], +] + +Name = 'SUSYModel' + +Config = {'MaxEvents': 50000, + 'Epochs': 10000, + 'BatchSize': 2048*8, + 'LearningRate': 0.005, + 'Decay': 0., + 'Momentum': 0., + 'Nesterov': 0., + 'arrType': float, + 'WeightInitialization':"'normal'"} + +Params = {'Width': [1585], + 'Depth': [1], + 'loss': ["'categorical_crossentropy'"]} + +PS = Permutator (Params) +Combos = PS.Permutations () + +print 'HyperParameter Scan:', len(Combos), 'possible combinations.' + +if 'HyperParamSet' in dir(): i = int(HyperParamSet) +else: + # Set Seed based on time + random.seed() + i = int(round( len(Combos)*random.random() )) + print 'Randomly picking HyperParameter Set' + pass + +if i < 0: + print 'SetList:' + for j in xrange(len(Combos)): print j, ':', Combos[j] + quit() + pass + +print 'Picked combination:', i + +for k in Combos[i]: Config[k] = Combos[i][k] diff --git a/ScanConfig.py b/ScanConfig.py deleted file mode 100644 index 1a8731a..0000000 --- a/ScanConfig.py +++ /dev/null @@ -1,62 +0,0 @@ -import random -import getopt -from DLTools.Permutator import * -import sys,argparse - -Name="EventClassificationDNN" - -Config={ - - "MaxEvents":50000, - "Epochs":1000, - "BatchSize":2048*8, - - "LearningRate":0.005, - - "Decay":0., - "Momentum":0., - "Nesterov":0., - - "WeightInitialization":"'normal'" -} - -Params={ "Width":[128], - "Depth":[2], - "loss":[#"'mean_squared_error'", - '"categorical_crossentropy"'], - } - -PS=Permutator(Params) -Combos=PS.Permutations() - -print "HyperParameter Scan: ", len(Combos), "possible combiniations." - -if "HyperParamSet" in dir(): - i=int(HyperParamSet) -else: - # Set Seed based on time - random.seed() - i=int(round(len(Combos)*random.random())) - print "Randomly picking HyperParameter Set" - - -if i<0: - print "SetList:" - for j in xrange(0,len(Combos)): - print j,":",Combos[j] - - quit() - - -print "Picked combination: ",i - -for k in Combos[i]: - Config[k]=Combos[i][k] - -for MetaData in Params.keys(): - val=str(Config[MetaData]).replace('"',"") - Name+="_"+val.replace("'","") - -print "Model Filename: ",Name - - diff --git a/ZllConfig.py b/ZllConfig.py new file mode 100644 index 0000000..0915acc --- /dev/null +++ b/ZllConfig.py @@ -0,0 +1,76 @@ +from DLTools.Permutator import * + +#Previously in InputFiles.py +# Define the Input Files +InputFiles = ['Zll.h5'] + +# Select datasets (formerly TTrees in original ROOT file) +Samples = [] +for InputData in InputFiles: + InputData = os.getenv('SampleDirZll') + '/' + InputData + Samples += [ [InputData, 'Zto2LOS'], + [InputData, 'Rndm2LOS'] ] + pass + +#Previously in InputVars.py +# Select Variables To use in training +Observables = { + 'LP_pT': {'trim': [0, 99]}, + 'LP_phi': {'range': [-np.pi, np.pi]}, + 'LP_eta': {'range': [-2.5, 2.5]}, + 'LP_E': {'trim': [0, 99]}, + + 'LM_pT': {'trim': [0, 99]}, + 'LM_phi': {'range': [-np.pi, np.pi]}, + 'LM_eta': {'range': [-2.5, 2.5]}, + 'LM_E': {'trim': [0, 99]}, +} + +SelectedFields = [ + # all variables + ['LP_pT', 'LP_eta', 'LP_phi', 'LP_E', + 'LM_pT', 'LM_eta', 'LM_phi', 'LM_E'], + + # known differences + ['LP_phi', 'LM_phi'], +] + +Name = 'ZllModel' + +Config = {'MaxEvents': 50000, + 'Epochs': 5000, + 'BatchSize': 2048*8, + 'LearningRate': 0.005, + 'Decay': 0., + 'Momentum': 0., + 'Nesterov': 0., + 'arrType' = np.float32, + 'WeightInitialization':"'normal'"} + +Params = {'Width': [1585, 100], + 'Depth': [1], + 'loss': ["'categorical_crossentropy'"]} + + +PS = Permutator (Params) +Combos = PS.Permutations () + +print 'HyperParameter Scan:', len(Combos), 'possible combinations.' + +if 'HyperParamSet' in dir(): i = int(HyperParamSet) +else: + # Set Seed based on time + random.seed() + i = int(round( len(Combos)*random.random() )) + print 'Randomly picking HyperParameter Set' + pass + +if i < 0: + print 'SetList:' + for j in xrange(len(Combos)): print j, ':', Combos[j] + quit() + pass + +print 'Picked combination:', i + +for k in Combos[i]: Config[k] = Combos[i][k]