Networks-Learning
diff --git a/‎README.md
+34 b/‎README.md
+34
diff --git a/‎config.py
+52 b/‎config.py
+52
diff --git a/‎conformal_prediction.py
+247 b/‎conformal_prediction.py
+247
@@ -0,0 +1,34 @@
+# Provably Improving Expert Predictions with Conformal Prediction
+
+## Install dependences
+
+Experiments ran on python 3.7.3, with GPU. To install the required libraries, set the torch version according to the available device (CPU or GPU) in `requirements.txt` and run:
+
+`pip install -r requirements.txt`
+
+## Running experiments
+
+For synthetic experiments run:
+
+`python3 ./run_conf_synthetic.py --n_labels `<*n*\> `--cal_split` <*split*\> `--runs` <*runs*\>
+
+where:
+*  <*n*\> is the the number of labels, i.e. $n$. 
+* <*split*\> is the calibration and estimation split, i.e. $\frac{m}{data\_to\_split}$.
+* <*runs*\> is the number of times that each experiment will run with different random splits of the above specified size.
+
+**Note:** the above runs experiments for  $\mathbb{P}[\hat Y = Y | \mathcal{Y}]\in\{0.3,0.5,0.7,0.9\}$ and classifiers' accuracies also $\in\{0.3,0.5,0.7,0.9\}$.
+
+For real data experiments run:
+
+`python3 ./run_conf_real.py --cal_split` <*split*\> `--runs` <*runs*\>
+
+where <*split*\> and <*runs*\> are the same as above.
+
+**Note:** the above runs experiments for all classifiers in the paper.
+
+## Results
+
+* All plots are produced in `plots.ipynb`. 
+* For the Tables we used the functions `print_accuracy_synthetic()` and `print_accuracy_tables_real()` for synthetic and real data experiments results respectively, in `plot/plot.py`. 
+* For results regarding the relative gain in success probability $\mathbb{P}[\hat{Y}= Y| \mathcal{C}_{\hat{\alpha}}(X)]$ with respect to $\mathbb{P}[\hat{Y}= Y| \mathcal{Y}]$ we used `get_mn()` for synthetic experiments and `get_m_real()` for real data experiments in `plot/plot.py`. 
@@ -0,0 +1,52 @@
+import numpy as np
+import torch
+import os
+import argparse
+
+class Config:
+    def __init__(self) -> None:
+        pass
+    
+parser = argparse.ArgumentParser()
+parser.add_argument("--n_labels", type=int, default=10) 
+parser.add_argument("--cal_split", type=float) 
+parser.add_argument("--runs", type=int, default=5)
+args = parser.parse_args()
+conf = Config()
+
+conf.ROOT_DIR = os.path.dirname(__file__)
+
+if torch.cuda.is_available():
+    conf.device = torch.cuda.current_device()
+else:
+    conf.device = 'cpu'
+
+
+conf.seed = 12345678
+
+conf.torch_rng = torch.Generator(device=conf.device)
+conf.torch_rng.manual_seed(conf.seed)
+conf.rng = np.random.default_rng(seed=conf.seed)
+conf.data_size = 10000
+# parameter to control difficulty of the dataset in synthetic
+conf.class_sep = {10:{0.3:0.46, 0.5:1.09, 0.7:1.72, 0.9: 2.75}, 
+                  50:{0.3:1.31, 0.5:2.16, 0.7:3.19, 0.9: 5.27},
+                 100:{0.3:1.75, 0.5:2.8, 0.7:4.4, 0.9: 7.7}  }
+
+conf.accuracies = np.arange(3,10, 2)/10.
+conf.is_oblivious= False
+
+conf.n_labels = args.n_labels
+conf.cal_split = args.cal_split
+
+conf.test_split = 0.2 # synthetic test split
+conf.n_runs_per_split = args.runs
+conf.delta = 0.1
+# synthetic 
+distr = conf.rng.dirichlet(np.ones(conf.n_labels),size=1)
+sum_distr = distr.sum()
+if sum_distr < 1.:
+    distr += (1 - sum_distr)/conf.n_labels
+conf.class_probabilities = distr
+
+conf.model_names = ['densenet-bc-L190-k40' ,'preresnet-110','resnet-110']
@@ -0,0 +1,247 @@
+from config import conf
+import numpy as np
+import torch
+import torch.nn.functional as F
+
+class ConformalPrediction:
+    def __init__(self, X_cal, y_cal, X_est, y_est,model, delta) -> None:
+        self.model = model
+        self.X_cal = X_cal
+        self.y_cal = y_cal
+        self.X_est = X_est
+        self.y_est = y_est
+        self.calibration_size = len(y_cal)
+        self.delta = delta
+        
+    def find_all_alpha_values():  
+        pass
+
+    def prediction_sets():
+        pass
+
+    def find_a_star():
+        pass
+
+class StandardCPgpu(ConformalPrediction):
+    # TODO change name of class not to be misleading
+    # It works with or without GPU 
+    # Implements system with both standard and modified conformal prediction
+    """Implementation of the functions of our system"""
+    def __init__(self, X_cal, y_cal, X_est, y_est,model, delta) -> None:
+        super().__init__(X_cal, y_cal,X_est, y_est, model, delta)
+    
+    def epsilon_fn(self, k_a, delta_n_alphas):
+        """Estimation error"""
+        delta_n_alphas_t = torch.tensor(delta_n_alphas)    
+        epsilon =  torch.sqrt((torch.log(delta_n_alphas_t))/(2*k_a))
+        return epsilon
+
+    def find_all_alpha_values(self):
+        """Retruns all 0<alpha<1 values that can be considered"""
+        # conformal scores of true labels in calibration set
+        model_out = self.model.predict_prob(self.X_cal)
+        one_hot = np.eye(conf.n_labels)[self.y_cal]
+        true_label_logits = model_out*one_hot
+        
+        conf_scores = sorted(1 - true_label_logits[true_label_logits >0 ])  
+        self.conf_scores_t = torch.tensor(conf_scores, device=conf.device)
+        # scores of all predicted labels for each sample in calibration set
+        logits =  self.model.predict_prob(self.X_cal)
+        logits_scores = (1 - logits)
+
+        # all coverages that result in different sets for each sample in calibration set
+        one_minus_alphas = np.searchsorted(conf_scores, logits_scores, side='left')/self.calibration_size
+
+        alphas = 1 - one_minus_alphas[one_minus_alphas < 1]  
+        alphas = alphas[(1-alphas) > 1/self.calibration_size] 
+        self.alphas = alphas
+        self.n_alphas = conf.n_labels*self.calibration_size
+        return alphas
+
+    
+
+    def find_a_star(self, w_matrix, a1_star_idx=None, all_a1_a2=False):
+        # TODO change title a_star means ^alpha 
+        """Return ^alpha"""
+        a_star_idx =-1
+        curr_criterion = 0
+        alphas1 = self.alphas.flatten()
+        qhat_a1 = torch.zeros((1,1), device=conf.device)
+        # alphas and qunatiles for shifted quantile method given a_1
+        if a1_star_idx is not None: 
+            quant_a1=  (np.ceil((1 - alphas1[a1_star_idx])*(self.calibration_size+1))/self.calibration_size)
+            qhat_a1 = torch.quantile(self.conf_scores_t, quant_a1)
+            alphas = self.alphas[self.alphas > self.alphas[a1_star_idx]]
+        else:
+            alphas = self.alphas
+
+        # all qunatiles for each alpha value
+        quant_unique =  (np.ceil((1 - alphas)*(self.calibration_size+1))/self.calibration_size).flatten()
+        self.epsilon = np.zeros(quant_unique.shape)
+        
+
+        # output scores for each sample in estimation set
+        output_scores = 1 - self.model.predict_prob(self.X_est)
+        
+        # move data to gpu if available
+        quants_t = torch.tensor(quant_unique, device=conf.device)
+ 
+        qhats_t = torch.quantile(self.conf_scores_t, quants_t, keepdim=True)
+        qhats_t = qhats_t.unsqueeze(1)
+        y_cal_t = torch.tensor(self.y_est, device=conf.device, dtype=torch.int64)
+        fill_value_t =  torch.tensor(0, dtype=torch.double,device=conf.device)
+        output_scores_t =  torch.tensor(output_scores,device=conf.device)
+        ws_t = torch.tensor(w_matrix[self.y_est], device=conf.device)
+
+        for i,q in enumerate(qhats_t):
+            qhats = q.expand(self.calibration_size,conf.n_labels )
+           
+            # sets[sample][label] is 1 for the labels in the prediction set for each sample
+            # sets for shifted quantile method given a_1
+            if a1_star_idx is not None:
+                qhats_a1 = qhat_a1.expand(self.calibration_size,conf.n_labels )
+                sets_upper = torch.where(  output_scores_t <= qhats_a1, 1,0)
+                sets_lower = torch.where(qhats <= output_scores_t, 1,0)
+                sets = sets_upper* sets_lower
+            else:
+                sets = torch.where(output_scores_t<= qhats, 1,0)
+            sets_exp_ws = sets * torch.exp(ws_t)
+
+             # denominators for all P[\hat Y = Y | C_alpha(X), Y \in C_alpha(X), Y=y]
+            denominators = torch.sum(sets_exp_ws, axis=1)    
+            one_hot_ycal = F.one_hot(y_cal_t)
+            # mask for prediction sets that include the true label
+            mask = sets * one_hot_ycal
+            true_label_in_sets_idx = torch.sum(mask, axis=1)
+            
+            # nominators for all P[\hat Y = Y | C_alpha(X), Y \in C_alpha(X), Y=y]
+            nominators = torch.sum(sets_exp_ws*one_hot_ycal, axis=1)
+
+            # apply mask so that Y \in C_alpha(X) is satisfied
+            masked_prob = torch.where(true_label_in_sets_idx==1, nominators/denominators, fill_value_t)
+            # number of sets that Y \in C_alpha(X) is satisfied
+            k_a = true_label_in_sets_idx.sum()        
+            # non empty sets and alpha_star > 0 
+            if k_a > 0 :
+                expected_correct_prob = masked_prob.sum()/k_a
+                delta_n_alphas = (alphas.shape[0] /self.delta) if not all_a1_a2 else ((self.calibration_size**2)*(conf.n_labels**2))/self.delta
+                epsilon = self.epsilon_fn(k_a, delta_n_alphas)
+                self.epsilon[i] = epsilon
+               
+                coverage = 1 - alphas[i] if not a1_star_idx else (alphas[i] - alphas1[a1_star_idx] - (1/(self.calibration_size + 1)))
+                criterion = coverage*(expected_correct_prob - epsilon)
+                if criterion > curr_criterion:
+                    a_star_idx = i
+                    curr_criterion = criterion
+        if all_a1_a2:
+            return a_star_idx, curr_criterion
+
+        return a_star_idx
+
+
+    def error_given_test_set_per_a(self, X_test, y_test, w_matrix, alphas,a_star_idx=None, a2_star_idx=None):
+        """Misprediction probability for each value of alpha or alpha_2 given alpha_1"""
+        test_size = len(X_test)
+        output_scores = 1 - self.model.predict_prob(X_test)
+       
+       # alphas and qunatiles for shifted quantile method
+        if a_star_idx is not None: 
+            quant_a1=  (np.ceil((1 - self.alphas[a_star_idx])*(self.calibration_size+1))/self.calibration_size)
+            qhat_a1 = torch.quantile(self.conf_scores_t, quant_a1)
+            alphas = np.array([alphas]) if a2_star_idx is not None else self.alphas[self.alphas > self.alphas[a_star_idx]]
+
+        qhats_unique =  (np.ceil((1 - alphas)*(self.calibration_size+1))/self.calibration_size)
+        error_rate_per_a = torch.zeros((len(qhats_unique),), device=conf.device)
+
+        # move data to gpu if available
+        qhats_t = torch.tensor(qhats_unique, device=conf.device).unsqueeze(1)
+        y_test_t = torch.tensor(y_test, device=conf.device, dtype=torch.int64)
+        output_scores_t =  torch.tensor(output_scores,device=conf.device)
+        ws_t = torch.tensor(w_matrix[y_test], device=conf.device)
+        a_empty_sets = 0
+        fill_value_t =  torch.exp(ws_t)/(torch.exp(ws_t).sum(axis=1).unsqueeze(1).expand(-1,conf.n_labels))
+
+        for i,q in enumerate(qhats_t):
+
+            qhats = q.expand(test_size,conf.n_labels )
+            # sets[sample][label] is 1 for the labels in the prediction set for each sample
+             # sets for shifted quantile method given alpha_1
+            if a_star_idx is not None:
+                qhats_a1 = qhat_a1.expand(test_size,conf.n_labels )
+                sets_upper = torch.where(  output_scores_t <= qhats_a1, 1,0)
+                sets_lower = torch.where(qhats <= output_scores_t, 1,0)
+                sets = sets_upper* sets_lower
+            else:
+                sets = torch.where(output_scores_t<=qhats, 1,0)
+            non_empty_sets = sets.sum(axis=1).count_nonzero()
+            
+            if non_empty_sets ==0 :
+                a_empty_sets+=1
+
+            # Denominators for  P[\hat Y = y | C_alpha(X), y \in C_alpha(X)]
+            sets_exp_ws = sets * torch.exp(ws_t)
+            denominators_col = torch.sum(sets_exp_ws, axis=1)
+            denominators = denominators_col.unsqueeze(1).expand(-1,conf.n_labels)
+
+
+            # Nomiators for  P[\hat Y = y | C_alpha(X), y \in C_alpha(X)]
+            nominators = sets_exp_ws        
+        
+            # confusion matrix for each C 
+            cm = torch.where(denominators>0, nominators/denominators, fill_value_t)
+
+            # human prediction from prediction sets
+            y_h = cm.multinomial(num_samples=1, replacement=True, generator=conf.torch_rng).squeeze()
+
+            # error for empty sets
+            
+            y_hats = torch.where(denominators_col>0, y_h , -1)
+            errors = (y_hats!=y_test_t).count_nonzero().double()
+            error_rate_per_a[i] = errors/test_size
+               
+                
+            
+        return error_rate_per_a
+
+
+
+    def size_given_test_set_per_a(self, X_test, y_test, w_matrix, alphas,a_star_idx=None, a2_star_idx=None):
+        """Average set size for each value of alpha or alpha_2 given alpha_1"""
+        test_size = len(X_test)
+        output_scores = 1 - self.model.predict_prob(X_test)
+    
+        # alphas and qunatiles for shifted quantile method
+        if a_star_idx is not None: 
+            quant_a1=  (np.ceil((1 - self.alphas[a_star_idx])*(self.calibration_size+1))/self.calibration_size)
+            qhat_a1 = torch.quantile(self.conf_scores_t, quant_a1)
+            alphas = np.array([alphas]) if a2_star_idx is not None else self.alphas[self.alphas > self.alphas[a_star_idx]]
+
+        qhats_unique =  (np.ceil((1 - alphas)*(self.calibration_size+1))/self.calibration_size)
+        set_size_per_a = torch.zeros((len(qhats_unique),), device=conf.device)
+
+        # move data to gpu if available
+        qhats_t = torch.tensor(qhats_unique, device=conf.device).unsqueeze(1)
+        output_scores_t =  torch.tensor(output_scores,device=conf.device)
+        ws_t = torch.tensor(w_matrix[y_test], device=conf.device)
+
+        for i,q in enumerate(qhats_t):
+
+            qhats = q.expand(test_size,conf.n_labels )
+            # sets[sample][label] is 1 for the labels in the prediction set for each sample
+            # sets for shifted quantile method
+            if a_star_idx is not None:
+                qhats_a1 = qhat_a1.expand(test_size,conf.n_labels )
+                sets_upper = torch.where(  output_scores_t <= qhats_a1, 1,0)
+                sets_lower = torch.where(qhats <= output_scores_t, 1,0)
+                sets = sets_upper* sets_lower
+            else:
+                sets = torch.where(output_scores_t<=qhats, 1,0)
+            size_per_set = sets.sum(axis=1)
+            set_size_per_a[i] = size_per_set.sum()/size_per_set.numel()
+               
+            
+        return set_size_per_a
+
+
+
+