diff --git a/models/official/projects/maskformer/__pycache__/__init__.cpython-39.pyc b/models/official/projects/maskformer/__pycache__/__init__.cpython-39.pyc
index 1f2d7037..46234b46 100644
Binary files a/models/official/projects/maskformer/__pycache__/__init__.cpython-39.pyc and b/models/official/projects/maskformer/__pycache__/__init__.cpython-39.pyc differ
diff --git a/models/official/projects/maskformer/__pycache__/optimization.cpython-39.pyc b/models/official/projects/maskformer/__pycache__/optimization.cpython-39.pyc
index 2af44232..0d382fd3 100644
Binary files a/models/official/projects/maskformer/__pycache__/optimization.cpython-39.pyc and b/models/official/projects/maskformer/__pycache__/optimization.cpython-39.pyc differ
diff --git a/models/official/projects/maskformer/configs/maskformer.py b/models/official/projects/maskformer/configs/maskformer.py
index 51a3cae4..fb5d3273 100644
--- a/models/official/projects/maskformer/configs/maskformer.py
+++ b/models/official/projects/maskformer/configs/maskformer.py
@@ -114,6 +114,10 @@ class MaskFormerTask(cfg.TaskConfig):
 SET_MODEL_BFLOAT16 = False
 SET_DATA_BFLOAT16 = True
 
+if not os.environ.get('USE_BFLOAT16_DATA'): 
+  SET_DATA_BFLOAT16 = False
+  
+
 @exp_factory.register_config_factory('maskformer_coco_panoptic')
 def maskformer_coco_panoptic() -> cfg.ExperimentConfig:
   """Config to get results that matches the paper."""
@@ -124,9 +128,22 @@ def maskformer_coco_panoptic() -> cfg.ExperimentConfig:
   ckpt_interval = (COCO_TRAIN_EXAMPLES // train_batch_size) * 10 # Don't write ckpts frequently. Slows down the training
   image_size = int(os.environ.get('IMG_SIZE'))
 
-  steps_per_epoch = COCO_TRAIN_EXAMPLES // train_batch_size
-  train_steps = 300 * steps_per_epoch  # 300 epochs
-  decay_at = train_steps - 100 * steps_per_epoch  # 200 epochs
+  if os.environ.get('STEPS_PER_EPOCH'): 
+    steps_per_epoch = int(os.environ.get('STEPS_PER_EPOCH'))
+  else: 
+    steps_per_epoch = COCO_TRAIN_EXAMPLES // train_batch_size
+
+  if os.environ.get('NUM_EPOCH'):
+    train_steps = int(os.environ.get('NUM_EPOCH')) * steps_per_epoch
+    decay_at = int(2/3 * train_steps)  
+  else: 
+    train_steps = 300 * steps_per_epoch  # 300 epochs
+    decay_at = train_steps - 100 * steps_per_epoch  # 200 epochs
+
+  # steps_per_epoch = COCO_TRAIN_EXAMPLES // train_batch_size
+  # train_steps = 300 * steps_per_epoch  # 300 epochs
+  # decay_at = train_steps - 100 * steps_per_epoch  # 200 epochs
+
   config = cfg.ExperimentConfig(
   task = MaskFormerTask(
           init_checkpoint="",
@@ -179,7 +196,7 @@ def maskformer_coco_panoptic() -> cfg.ExperimentConfig:
           )),
       trainer=cfg.TrainerConfig(
           train_steps=train_steps,
-          validation_steps=COCO_VAL_EXAMPLES // eval_batch_size,
+          validation_steps=COCO_VAL_EXAMPLES // eval_batch_size if not os.environ.get('VAL_STEPS') else int(os.environ.get('VAL_STEPS')),
           steps_per_loop=steps_per_epoch,
           summary_interval=steps_per_epoch,
           checkpoint_interval=steps_per_epoch,
diff --git a/models/official/projects/maskformer/eval_cpu.sh b/models/official/projects/maskformer/eval_cpu.sh
index 67bf03ed..ff09a729 100644
--- a/models/official/projects/maskformer/eval_cpu.sh
+++ b/models/official/projects/maskformer/eval_cpu.sh
@@ -1,9 +1,9 @@
 #!/bin/bash
 train_bsize=1
 eval_bsize=1
-export PYTHONPATH=$PYTHONPATH:~/tf-maskformer/models
-export MODEL_DIR="gs://cam2-models/maskformer_vishal_exps/EXP20_v8_eval"
-export MASKFORMER_CKPT="gs://cam2-models/maskformer_vishal_exps/EXP20_v8/ckpt-18480"
+export PYTHONPATH=$PYTHONPATH:/depot/davisjam/data/akshath/MaskFormer_tf/tf-maskformer/models
+export MODEL_DIR="gs://cam2-models/maskformer_vishal_exps/EXP26_v8_eval"
+export MASKFORMER_CKPT="gs://cam2-models/maskformer_vishal_exps/EXP26_v8/ckpt-482328"
 export RESNET_CKPT="gs://cam2-models/maskformer_vishal_exps/resnet50_pretrained/tfmg/ckpt-62400"
 export TFRECORDS_DIR="gs://cam2-datasets/coco_panoptic/tfrecords"
 export TRAIN_BATCH_SIZE=$train_bsize
@@ -16,7 +16,7 @@ export OVERRIDES="runtime.distribution_strategy=one_device,runtime.mixed_precisi
 task.validation_data.global_batch_size=$EVAL_BATCH_SIZE,task.model.which_pixel_decoder=transformer_fpn,\
 task.init_checkpoint_modules=all,\
 task.init_checkpoint=$MASKFORMER_CKPT"
-python3 models/official/projects/maskformer/train.py \
+python3 train.py \
   --experiment maskformer_coco_panoptic \
   --mode eval \
   --model_dir $MODEL_DIR \
diff --git a/models/official/projects/maskformer/eval_gpu.sh b/models/official/projects/maskformer/eval_gpu.sh
index 3def5bf4..dbe3d662 100755
--- a/models/official/projects/maskformer/eval_gpu.sh
+++ b/models/official/projects/maskformer/eval_gpu.sh
@@ -16,7 +16,7 @@ task.validation_data.global_batch_size=$EVAL_BATCH_SIZE,\
 task.model.which_pixel_decoder=transformer_fpn,\
 task.init_checkpoint_modules=all,\
 task.init_checkpoint=$MASKFORMER_CKPT"
-python3 models/official/projects/maskformer/train.py \
+python3 train.py \
   --experiment maskformer_coco_panoptic \
   --mode eval \
   --model_dir $MODEL_DIR \
diff --git a/models/official/projects/maskformer/losses/__pycache__/__init__.cpython-39.pyc b/models/official/projects/maskformer/losses/__pycache__/__init__.cpython-39.pyc
index bf307409..b47ab470 100644
Binary files a/models/official/projects/maskformer/losses/__pycache__/__init__.cpython-39.pyc and b/models/official/projects/maskformer/losses/__pycache__/__init__.cpython-39.pyc differ
diff --git a/models/official/projects/maskformer/losses/__pycache__/inference.cpython-39.pyc b/models/official/projects/maskformer/losses/__pycache__/inference.cpython-39.pyc
index 9860ab02..56dbe7b1 100644
Binary files a/models/official/projects/maskformer/losses/__pycache__/inference.cpython-39.pyc and b/models/official/projects/maskformer/losses/__pycache__/inference.cpython-39.pyc differ
diff --git a/models/official/projects/maskformer/losses/__pycache__/maskformer_losses.cpython-39.pyc b/models/official/projects/maskformer/losses/__pycache__/maskformer_losses.cpython-39.pyc
index 04b0d559..63987d74 100644
Binary files a/models/official/projects/maskformer/losses/__pycache__/maskformer_losses.cpython-39.pyc and b/models/official/projects/maskformer/losses/__pycache__/maskformer_losses.cpython-39.pyc differ
diff --git a/models/official/projects/maskformer/losses/maskformer_losses.py b/models/official/projects/maskformer/losses/maskformer_losses.py
index 8193422e..3dc470b6 100644
--- a/models/official/projects/maskformer/losses/maskformer_losses.py
+++ b/models/official/projects/maskformer/losses/maskformer_losses.py
@@ -54,6 +54,7 @@ def batch(self, y_true, y_pred):
         loss = tf.einsum("bnc,bmc->bnm",focal_pos,y_true) + tf.einsum(
         "bnc,bmc->bnm", focal_neg,(1 - y_true)
         )
+
         return loss/hw
     
 
@@ -88,7 +89,7 @@ def batch(self, y_true, y_pred):
         return loss
 
 class Loss:
-    def __init__(self, num_classes, matcher, eos_coef, cost_class = 1.0, cost_focal = 20.0, cost_dice = 1.0, ignore_label =0):
+    def __init__(self, num_classes, matcher, eos_coef, cost_class = 1.0, cost_focal = 1.0, cost_dice = 1.0, ignore_label =0):
        
         self.num_classes = num_classes
         self.matcher = matcher
@@ -120,13 +121,13 @@ def memory_efficient_matcher(self, outputs, y_true):
         tgt_mask_permuted = tf.reshape(tgt_mask_permuted, [tf.shape(tgt_mask_permuted)[0],tf.shape(tgt_mask_permuted)[1], -1]) # [b, 100, h*w]
         
         cost_focal = FocalLossMod().batch(tgt_mask_permuted, out_mask)
-        cost_dice = DiceLoss().batch(tgt_mask_permuted, out_mask)
+        cost_dice =  DiceLoss().batch(tgt_mask_permuted, out_mask)
        
         
         total_cost = (
-                self.cost_focal * cost_focal
-                + self.cost_class * cost_class
-                + self.cost_dice * cost_dice
+                self.cost_focal * cost_focal 
+                + self.cost_class * cost_class 
+                + self.cost_dice * cost_dice 
             )
         
         max_cost = (
@@ -135,14 +136,26 @@ def memory_efficient_matcher(self, outputs, y_true):
                     self.cost_dice * 0.0
                     )
 
-        
+        # print('cost_focal')
+        # print(cost_focal, cost_class, cost_dice)
+        # print('total_ocst')
+        # print(total_cost)
+        # print('max_cost')
+        # print(max_cost)
+
         # Append highest cost where there are no objects : No object class == 0 (self.ignore_label)
         valid = tf.expand_dims(tf.cast(tf.not_equal(tgt_ids, self.ignore_label), dtype=total_cost.dtype), axis=1)
+        # print('max_cost - ', max_cost)
+        # print('total_cost before - ', total_cost)
+
         total_cost = (1 - valid) * max_cost + valid * total_cost
+        # print('total_cost after - ', total_cost)
+
         total_cost = tf.where(
         tf.logical_or(tf.math.is_nan(total_cost), tf.math.is_inf(total_cost)),
         max_cost * tf.ones_like(total_cost, dtype=total_cost.dtype),
         total_cost)
+ 
        
         _, inds = matchers.hungarian_matching(total_cost)
         indices = tf.stop_gradient(inds)
@@ -168,10 +181,10 @@ def get_loss(self, outputs, y_true, indices):
         num_masks = tf.reduce_sum(tf.cast(tf.logical_not(background), tf.float32), axis=-1)
         
         xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=target_classes, logits=cls_assigned)
-        
+
         cls_loss =  tf.where(background, self.eos_coef * xentropy, xentropy)
-        
         cls_weights = tf.where(background, self.eos_coef * tf.ones_like(cls_loss), tf.ones_like(cls_loss))
+        # print('Weights: ', cls_weights)
         
         num_masks_per_replica = tf.reduce_sum(num_masks)
         
@@ -181,7 +194,10 @@ def get_loss(self, outputs, y_true, indices):
         num_masks_sum, cls_weights_sum = replica_context.all_reduce(tf.distribute.ReduceOp.SUM,[num_masks_per_replica, cls_weights_per_replica])
         
         # Final losses
+        # print('Losses: ', cls_loss)
+
         cls_loss = tf.math.divide_no_nan(tf.reduce_sum(cls_loss), cls_weights_sum)
+        # print('Final loss given for changing the tvars - ', cls_loss)
         
         out_mask = mask_assigned
         tgt_mask = individual_masks
@@ -206,10 +222,12 @@ def get_loss(self, outputs, y_true, indices):
         focal_loss = FocalLossMod(alpha=0.25, gamma=2)(tgt_mask, out_mask)
         focal_loss_weighted = tf.where(background, tf.zeros_like(focal_loss), focal_loss)
         focal_loss_final = tf.math.divide_no_nan(tf.math.reduce_sum(tf.math.reduce_sum(focal_loss_weighted, axis=-1)), num_masks_sum)
-            
+        # print(focal_loss_weighted)
         dice_loss = DiceLoss()(tgt_mask, out_mask)
         dice_loss_weighted = tf.where(background, tf.zeros_like(dice_loss), dice_loss)
         dice_loss_final = tf.math.divide_no_nan(tf.math.reduce_sum(tf.math.reduce_sum(dice_loss_weighted, axis=-1)), num_masks_sum)
+        # print(dice_loss_weighted)
+        # raise ValueError('2')
         
         return cls_loss, focal_loss_final, dice_loss_final
     
diff --git a/models/official/projects/maskformer/modeling/__pycache__/__init__.cpython-39.pyc b/models/official/projects/maskformer/modeling/__pycache__/__init__.cpython-39.pyc
index 7705fe67..24b40734 100644
Binary files a/models/official/projects/maskformer/modeling/__pycache__/__init__.cpython-39.pyc and b/models/official/projects/maskformer/modeling/__pycache__/__init__.cpython-39.pyc differ
diff --git a/models/official/projects/maskformer/modeling/__pycache__/maskformer.cpython-39.pyc b/models/official/projects/maskformer/modeling/__pycache__/maskformer.cpython-39.pyc
index 88362d2a..a7f07978 100644
Binary files a/models/official/projects/maskformer/modeling/__pycache__/maskformer.cpython-39.pyc and b/models/official/projects/maskformer/modeling/__pycache__/maskformer.cpython-39.pyc differ
diff --git a/models/official/projects/maskformer/modeling/decoder/__pycache__/__init__.cpython-39.pyc b/models/official/projects/maskformer/modeling/decoder/__pycache__/__init__.cpython-39.pyc
index 2432cf8b..df12e243 100644
Binary files a/models/official/projects/maskformer/modeling/decoder/__pycache__/__init__.cpython-39.pyc and b/models/official/projects/maskformer/modeling/decoder/__pycache__/__init__.cpython-39.pyc differ
diff --git a/models/official/projects/maskformer/modeling/decoder/__pycache__/detr_transformer.cpython-39.pyc b/models/official/projects/maskformer/modeling/decoder/__pycache__/detr_transformer.cpython-39.pyc
index a204c895..edf54dd4 100644
Binary files a/models/official/projects/maskformer/modeling/decoder/__pycache__/detr_transformer.cpython-39.pyc and b/models/official/projects/maskformer/modeling/decoder/__pycache__/detr_transformer.cpython-39.pyc differ
diff --git a/models/official/projects/maskformer/modeling/decoder/__pycache__/pixel_decoder.cpython-39.pyc b/models/official/projects/maskformer/modeling/decoder/__pycache__/pixel_decoder.cpython-39.pyc
index 552b5b3c..c39ea97c 100644
Binary files a/models/official/projects/maskformer/modeling/decoder/__pycache__/pixel_decoder.cpython-39.pyc and b/models/official/projects/maskformer/modeling/decoder/__pycache__/pixel_decoder.cpython-39.pyc differ
diff --git a/models/official/projects/maskformer/modeling/decoder/__pycache__/transformer_decoder.cpython-39.pyc b/models/official/projects/maskformer/modeling/decoder/__pycache__/transformer_decoder.cpython-39.pyc
index c79a215a..b982e786 100644
Binary files a/models/official/projects/maskformer/modeling/decoder/__pycache__/transformer_decoder.cpython-39.pyc and b/models/official/projects/maskformer/modeling/decoder/__pycache__/transformer_decoder.cpython-39.pyc differ
diff --git a/models/official/projects/maskformer/modeling/decoder/__pycache__/transformer_pixel_decoder.cpython-39.pyc b/models/official/projects/maskformer/modeling/decoder/__pycache__/transformer_pixel_decoder.cpython-39.pyc
index 246f0afc..f290d977 100644
Binary files a/models/official/projects/maskformer/modeling/decoder/__pycache__/transformer_pixel_decoder.cpython-39.pyc and b/models/official/projects/maskformer/modeling/decoder/__pycache__/transformer_pixel_decoder.cpython-39.pyc differ
diff --git a/models/official/projects/maskformer/modeling/decoder/detr_transformer.py b/models/official/projects/maskformer/modeling/decoder/detr_transformer.py
index fffb654e..3d9ebe52 100644
--- a/models/official/projects/maskformer/modeling/decoder/detr_transformer.py
+++ b/models/official/projects/maskformer/modeling/decoder/detr_transformer.py
@@ -64,7 +64,6 @@ def call(self, inputs):
     target_shape = tf.shape(targets)
     
     if mask is not None:
-      
       cross_attention_mask = tf.tile(
           tf.expand_dims(mask, axis=1), [1, target_shape[1], 1])
       self_attention_mask=tf.ones(
diff --git a/models/official/projects/maskformer/modeling/layers/__pycache__/__init__.cpython-39.pyc b/models/official/projects/maskformer/modeling/layers/__pycache__/__init__.cpython-39.pyc
index a5818e23..b67a3373 100644
Binary files a/models/official/projects/maskformer/modeling/layers/__pycache__/__init__.cpython-39.pyc and b/models/official/projects/maskformer/modeling/layers/__pycache__/__init__.cpython-39.pyc differ
diff --git a/models/official/projects/maskformer/modeling/layers/__pycache__/nn_block.cpython-39.pyc b/models/official/projects/maskformer/modeling/layers/__pycache__/nn_block.cpython-39.pyc
index 8ebb1f1e..2905ea77 100644
Binary files a/models/official/projects/maskformer/modeling/layers/__pycache__/nn_block.cpython-39.pyc and b/models/official/projects/maskformer/modeling/layers/__pycache__/nn_block.cpython-39.pyc differ
diff --git a/models/official/projects/maskformer/modeling/layers/nn_block.py b/models/official/projects/maskformer/modeling/layers/nn_block.py
index 7f7da5d1..0ad8f17b 100644
--- a/models/official/projects/maskformer/modeling/layers/nn_block.py
+++ b/models/official/projects/maskformer/modeling/layers/nn_block.py
@@ -1,4 +1,7 @@
 import tensorflow as tf
+from official.modeling import tf_utils
+from official.nlp.modeling import layers
+from official.nlp.modeling import models
 
 '''
 Transformer Parameters:
@@ -74,6 +77,8 @@ def build(self, input_shape):
                 # Final Layer
                 self._layers.append(
                     tf.keras.layers.Dense(dim[1], activation=None))
+                    # kernel_initializer=tf_utils.clone_initializer(tf.keras.initializers.get('glorot_uniform')), 
+                    # bias_initializer=tf_utils.clone_initializer(tf.keras.initializers.get('glorot_uniform')))
 
     def call(self, x):
         for layer in self._layers:
diff --git a/models/official/projects/maskformer/modeling/maskformer.py b/models/official/projects/maskformer/modeling/maskformer.py
index 7a552baf..9851793c 100644
--- a/models/official/projects/maskformer/modeling/maskformer.py
+++ b/models/official/projects/maskformer/modeling/maskformer.py
@@ -1,5 +1,6 @@
 import tensorflow as tf
-
+import numpy as np
+import os
 from official.projects.maskformer.modeling.decoder.transformer_decoder import MaskFormerTransformer
 from official.projects.maskformer.modeling.layers.nn_block import MLPHead
 from official.projects.maskformer.modeling.decoder.transformer_pixel_decoder import TransformerFPN
@@ -150,6 +151,7 @@ def process_feature_maps(self, maps):
 	def call(self, image, training = False):
 		backbone_feature_maps = self._backbone(image)
 		backbone_feature_maps_procesed = self.process_feature_maps(backbone_feature_maps)
+
 		if self._pixel_decoder == 'fpn':
 			mask_features = self.pixel_decoder(backbone_feature_maps_procesed)
 			transformer_enc_feat = backbone_feature_maps_procesed['5']
@@ -158,4 +160,4 @@ def call(self, image, training = False):
 		transformer_features = self.transformer({"features": transformer_enc_feat})
 		seg_pred = self.head({"per_pixel_embeddings" : mask_features,
 							"per_segment_embeddings": transformer_features})
-		return seg_pred
+		return seg_pred
\ No newline at end of file
diff --git a/models/official/projects/maskformer/params.yaml b/models/official/projects/maskformer/params.yaml
new file mode 100644
index 00000000..9c44ddf6
--- /dev/null
+++ b/models/official/projects/maskformer/params.yaml
@@ -0,0 +1,226 @@
+runtime:
+  all_reduce_alg: null
+  batchnorm_spatial_persistent: false
+  dataset_num_private_threads: null
+  default_shard_dim: -1
+  distribution_strategy: one_device
+  enable_xla: false
+  gpu_thread_mode: null
+  loss_scale: null
+  mixed_precision_dtype: float32
+  num_cores_per_replica: 1
+  num_gpus: 1
+  num_packs: 1
+  per_gpu_thread_count: 0
+  run_eagerly: false
+  task_index: -1
+  tpu: null
+  tpu_enable_xla_dynamic_padder: null
+  worker_hosts: null
+task:
+  allow_image_summary: false
+  bfloat16: false
+  differential_privacy_config: null
+  init_checkpoint: ''
+  init_checkpoint_modules: backbone
+  losses:
+    background_cls_weight: 0.1
+    class_offset: 0
+    l2_weight_decay: 0.0001
+  model:
+    backbone:
+      resnet:
+        bn_trainable: false
+        depth_multiplier: 1.0
+        model_id: 50
+        replace_stem_max_pool: false
+        resnetd_shortcut: false
+        scale_stem: true
+        se_ratio: 0.0
+        stem_type: v0
+        stochastic_depth_drop_rate: 0.0
+      type: resnet
+    backbone_endpoint_name: '5'
+    detr_encoder_layers: 0
+    fpn_encoder_layers: 6
+    hidden_size: 256
+    input_size: [640, 640, 3]
+    norm_activation:
+      activation: relu
+      norm_epsilon: 0.001
+      norm_momentum: 0.99
+      use_sync_bn: true
+    num_classes: 133
+    num_decoder_layers: 6
+    num_queries: 100
+    which_pixel_decoder: transformer_fpn
+  name: null
+  panoptic_quality_evaluator:
+    ignored_label: 0
+    is_thing: null
+    max_num_instances: 100
+    num_categories: 133
+    rescale_predictions: false
+  per_category_metrics: false
+  train_data:
+    apply_tf_data_service_before_batching: false
+    block_length: 1
+    cache: false
+    cycle_length: null
+    decoder:
+      simple_decoder:
+        attribute_names: []
+        mask_binarize_threshold: null
+        regenerate_source_id: false
+      type: simple_decoder
+    deterministic: null
+    drop_remainder: true
+    dtype: bfloat16
+    enable_shared_tf_data_service_between_parallel_trainers: false
+    enable_tf_data_service: false
+    file_type: tfrecord
+    global_batch_size: 2
+    input_path: /depot/davisjam/data/vishal/datasets/coco/tfrecords/train*
+    is_training: true
+    parser:
+      aspect_ratio_range: !!python/tuple
+      - 0.5
+      - 2.0
+      aug_rand_hflip: true
+      aug_scale_max: 1.0
+      aug_scale_min: 1.0
+      dtype: bfloat16
+      groundtruth_padded_size: [640, 640]
+      ignore_label: 0
+      max_retry: 50
+      min_overlap_params: !!python/tuple
+      - 0.0
+      - 1.4
+      - 0.2
+      - 0.1
+      min_scale: 0.3
+      output_size: [640, 640]
+      pad_output: true
+      resize_eval_groundtruth: true
+      seed: 2045
+    prefetch_buffer_size: null
+    regenerate_source_id: false
+    seed: null
+    sharding: true
+    shuffle_buffer_size: 1000
+    tf_data_service_address: null
+    tf_data_service_job_name: null
+    tfds_as_supervised: false
+    tfds_data_dir: ''
+    tfds_name: ''
+    tfds_skip_decoding_feature: ''
+    tfds_split: train
+    trainer_id: null
+  validation_data:
+    apply_tf_data_service_before_batching: false
+    block_length: 1
+    cache: false
+    cycle_length: null
+    decoder:
+      simple_decoder:
+        attribute_names: []
+        mask_binarize_threshold: null
+        regenerate_source_id: false
+      type: simple_decoder
+    deterministic: null
+    drop_remainder: false
+    dtype: float32
+    enable_shared_tf_data_service_between_parallel_trainers: false
+    enable_tf_data_service: false
+    file_type: tfrecord
+    global_batch_size: 1
+    input_path: /depot/davisjam/data/vishal/datasets/coco/tfrecords/val*
+    is_training: false
+    parser:
+      aspect_ratio_range: !!python/tuple
+      - 0.5
+      - 2.0
+      aug_rand_hflip: true
+      aug_scale_max: 1.0
+      aug_scale_min: 1.0
+      dtype: bfloat16
+      groundtruth_padded_size: !!python/tuple
+      - 1280
+      - 1280
+      ignore_label: 0
+      max_retry: 50
+      min_overlap_params: !!python/tuple
+      - 0.0
+      - 1.4
+      - 0.2
+      - 0.1
+      min_scale: 0.3
+      output_size: [640, 640]
+      pad_output: true
+      resize_eval_groundtruth: true
+      seed: 2045
+    prefetch_buffer_size: null
+    regenerate_source_id: false
+    seed: null
+    sharding: true
+    shuffle_buffer_size: 10000
+    tf_data_service_address: null
+    tf_data_service_job_name: null
+    tfds_as_supervised: false
+    tfds_data_dir: ''
+    tfds_name: ''
+    tfds_skip_decoding_feature: ''
+    tfds_split: train
+    trainer_id: null
+trainer:
+  allow_tpu_summary: false
+  best_checkpoint_eval_metric: ''
+  best_checkpoint_export_subdir: best_ckpt
+  best_checkpoint_metric_comp: higher
+  checkpoint_interval: 59143
+  continuous_eval_timeout: 3600
+  eval_tf_function: true
+  eval_tf_while_loop: false
+  loss_upper_bound: 1000000.0
+  max_to_keep: 3
+  optimizer_config:
+    ema: null
+    learning_rate:
+      stepwise:
+        boundaries: [39428]
+        name: PiecewiseConstantDecay
+        offset: 0
+        values: [0.0001, 1.0e-05]
+      type: stepwise
+    optimizer:
+      maskformer_adamw:
+        amsgrad: false
+        beta_1: 0.9
+        beta_2: 0.999
+        clipnorm: null
+        clipvalue: null
+        epsilon: 1.0e-07
+        exclude_from_weight_decay: null
+        global_clipnorm: 0.1
+        gradient_clip_norm: 0.0
+        include_in_weight_decay: null
+        name: AdamWeightDecay
+        weight_decay_rate: 0.0001
+      type: maskformer_adamw
+    warmup:
+      linear:
+        name: linear
+        warmup_learning_rate: 0.0
+        warmup_steps: 640
+      type: linear
+  preemption_on_demand_checkpoint: true
+  recovery_begin_steps: 0
+  recovery_max_trials: 0
+  steps_per_loop: 59143
+  summary_interval: 59143
+  train_steps: 59143
+  train_tf_function: true
+  train_tf_while_loop: true
+  validation_interval: 59143
+  validation_steps: 5000
+  validation_summary_subdir: validation
diff --git a/models/official/projects/maskformer/tasks/__pycache__/__init__.cpython-39.pyc b/models/official/projects/maskformer/tasks/__pycache__/__init__.cpython-39.pyc
index bdf3c9f3..a4884cab 100644
Binary files a/models/official/projects/maskformer/tasks/__pycache__/__init__.cpython-39.pyc and b/models/official/projects/maskformer/tasks/__pycache__/__init__.cpython-39.pyc differ
diff --git a/models/official/projects/maskformer/tasks/__pycache__/panoptic_maskformer.cpython-39.pyc b/models/official/projects/maskformer/tasks/__pycache__/panoptic_maskformer.cpython-39.pyc
index ff6b0bf5..7ab91994 100644
Binary files a/models/official/projects/maskformer/tasks/__pycache__/panoptic_maskformer.cpython-39.pyc and b/models/official/projects/maskformer/tasks/__pycache__/panoptic_maskformer.cpython-39.pyc differ
diff --git a/models/official/projects/maskformer/tasks/panoptic_maskformer.py b/models/official/projects/maskformer/tasks/panoptic_maskformer.py
index 1ad9763a..5567de84 100644
--- a/models/official/projects/maskformer/tasks/panoptic_maskformer.py
+++ b/models/official/projects/maskformer/tasks/panoptic_maskformer.py
@@ -1,7 +1,7 @@
 import os
 from absl import logging
 import tensorflow as tf
-
+import matplotlib.pyplot as plt
 from official.core import base_task
 from official.core import task_factory
 from official.core import train_utils
@@ -37,6 +37,35 @@ class PanopticTask(base_task.Task):
 	"""
 	def build_model(self):
 		"""Builds MaskFormer Model."""
+
+		self.class_ids = {}
+		self.plot_collection = {} 
+		self.plot_collection_labels = {0:[]}
+		self.temp = 0	
+		self.background_empty_mask = {}
+		self.labelled_empty_mask = {}
+		self.background_non_empty_mask = {}
+		self.class_id_counts = {}
+		self.log_dir = os.environ.get('LOG_DIR')
+		self.run_number = os.environ.get('RUN_NUMBER')
+
+		if self.log_dir:
+			try: 
+				os.mkdir(self.log_dir)
+			except: 
+				pass 
+			os.mkdir(os.path.join(self.log_dir, self.run_number)) # If there is existing, then throw error
+			self.log_dir = os.path.join(self.log_dir, self.run_number)
+
+			with open(os.path.join(self.log_dir, 'checking_labels.txt'), 'w') as file:
+				pass
+			
+			with open(os.path.join(self.log_dir, 'settings.txt'), 'w') as file:
+				file.write("RUN: " + str(os.environ.get('RUN_NUMBER')) + '\n')
+				file.write("BSIZE: " + str(os.environ.get('TRAIN_BATCH_SIZE'))+ '\n')
+				file.write("BASE_LR: " + str(os.environ.get('BASE_LR'))+ '\n')
+				file.write("NO_OBJ_CLS_WEIGHT: " + str(os.environ.get('NO_OBJ_CLS_WEIGHT'))+ '\n')
+			
 		logging.info('Building MaskFormer model.')
 		input_specs = tf.keras.layers.InputSpec(shape=[None] + self._task_config.model.input_size)
 		
@@ -58,14 +87,14 @@ def build_model(self):
 		logging.info('Maskformer model build successful.')
 		inputs = tf.keras.Input(shape=input_specs.shape[1:])
 		model(inputs)
-		
+		model.summary() 
 		return model
 
 	def initialize(self, model: tf.keras.Model) -> None:
 		"""
 		Used to initialize the models with checkpoint
 		"""
-		
+
 		logging.info('Initializing model from checkpoint: %s', self._task_config.init_checkpoint)
 		if not self._task_config.init_checkpoint:
 			return
@@ -185,8 +214,139 @@ def build_metrics(self, training=True):
 			)
 		return metrics
 		
+	def _log_classes(self, labels: Dict[str, Any]) -> List[Dict[int, int]]:
+		""" 
+		Logs all the class IDs viewed during training and evaluation.
 		
+		Returns: 
+		A dictionary of class ids and their counts across all images in batch
+		"""
+
+		all_unique_ids = labels["unique_ids"]._numpy()
+		classes_in_batch = []
+		for size in range(all_unique_ids.shape[0]):
+			unique_ids = all_unique_ids[size, :]
+			classes_in_image = {}
+			for class_id in unique_ids: 
+				if class_id in classes_in_image: 
+					classes_in_image[class_id] += 1
+				else: 
+					classes_in_image[class_id] = 1
+			classes_in_batch.append(classes_in_image)
+
+			for class_id in unique_ids: 
+				if class_id in self.class_ids: 
+					self.class_ids[class_id] += 1
+				else: 
+					self.class_ids[class_id] = 1
+
+		return classes_in_batch
+
+	def _check_contigious_mask(self, labels: Dict[str, Any]):
+		"""	
+		Checks if all the contigious masks are mapped properly from the category masks 
+
+		Returns:
+		EagerTensor with correctly mapped contigious masks
+		"""
+		mapping_dict  = {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 13: 12, 14: 13, 15: 14, 16: 15, 17: 16, 18: 17, \
+		19: 18, 20: 19, 21: 20, 22: 21, 23: 22, 24: 23, 25: 24, 27: 25, 28: 26, 31: 27, 32: 28, 33: 29, 34: 30, 35: 31, 36: 32, 37: 33, 38: 34, \
+		39: 35, 40: 36, 41: 37, 42: 38, 43: 39, 44: 40, 46: 41, 47: 42, 48: 43, 49: 44, 50: 45, 51: 46, 52: 47, 53: 48, 54: 49, 55: 50, 56: 51, 57: 52, \
+		58: 53, 59: 54, 60: 55, 61: 56, 62: 57, 63: 58, 64: 59, 65: 60, 67: 61, 70: 62, 72: 63, 73: 64, 74: 65, 75: 66, 76: 67, 77: 68, 78: 69, 79: 70, \
+		80: 71, 81: 72, 82: 73, 84: 74, 85: 75, 86: 76, 87: 77, 88: 78, 89: 79, 90: 80, 92: 81, 93: 82, 95: 83, 100: 84, 107: 85, 109: 86, 112: 87, \
+		118: 88, 119: 89, 122: 90, 125: 91, 128: 92, 130: 93, 133: 94, 138: 95, 141: 96, 144: 97, 145: 98, 147: 99, 148: 100, 149: 101, 151: 102, \
+		154: 103, 155: 104, 156: 105, 159: 106, 161: 107, 166: 108, 168: 109, 171: 110, 175: 111, 176: 112, 177: 113, 178: 114, 180: 115, 181: 116, \
+		184: 117, 185: 118, 186: 119, 187: 120, 188: 121, 189: 122, 190: 123, 191: 124, 192: 125, 193: 126, 194: 127, 195: 128, 196: 129, 197: 130, \
+		198: 131, 199: 132, 200: 133}
+
+		category_mask = labels["category_mask"]._numpy()
+		contigious_mask = labels["contigious_mask"]._numpy()
+
+		for size in range(category_mask.shape[0]):
+			cat = category_mask[size]
+			cont = contigious_mask[size, :, :, :]
+			mapped_cat = np.expand_dims(np.array([[mapping_dict.get(int(x), int(x)) for x in row] for row in cat]), axis=-1)
+			if not np.array_equal(mapped_cat, cont): 
+				contigious_mask[size, :, :, :] = mapped_cat
+			
+		return tf.convert_to_tensor(contigious_mask)
+
+	def _check_induvidual_masks(self, labels: Dict[str, Any], class_id_counts: List[Dict[int, int]]):
+		"""
+		Checks if all the induvidual masks are given the correct instance id
+
+		Returns:
+		EagerTensor with correctly mapped induvidual masks
+		"""
+
+		# mapping_dict  = {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 13: 12, 14: 13, 15: 14, 16: 15, 17: 16, 18: 17, \
+		# 19: 18, 20: 19, 21: 20, 22: 21, 23: 22, 24: 23, 25: 24, 27: 25, 28: 26, 31: 27, 32: 28, 33: 29, 34: 30, 35: 31, 36: 32, 37: 33, 38: 34, \
+		# 39: 35, 40: 36, 41: 37, 42: 38, 43: 39, 44: 40, 46: 41, 47: 42, 48: 43, 49: 44, 50: 45, 51: 46, 52: 47, 53: 48, 54: 49, 55: 50, 56: 51, 57: 52, \
+		# 58: 53, 59: 54, 60: 55, 61: 56, 62: 57, 63: 58, 64: 59, 65: 60, 67: 61, 70: 62, 72: 63, 73: 64, 74: 65, 75: 66, 76: 67, 77: 68, 78: 69, 79: 70, \
+		# 80: 71, 81: 72, 82: 73, 84: 74, 85: 75, 86: 76, 87: 77, 88: 78, 89: 79, 90: 80, 92: 81, 93: 82, 95: 83, 100: 84, 107: 85, 109: 86, 112: 87, \
+		# 118: 88, 119: 89, 122: 90, 125: 91, 128: 92, 130: 93, 133: 94, 138: 95, 141: 96, 144: 97, 145: 98, 147: 99, 148: 100, 149: 101, 151: 102, \
+		# 154: 103, 155: 104, 156: 105, 159: 106, 161: 107, 166: 108, 168: 109, 171: 110, 175: 111, 176: 112, 177: 113, 178: 114, 180: 115, 181: 116, \
+		# 184: 117, 185: 118, 186: 119, 187: 120, 188: 121, 189: 122, 190: 123, 191: 124, 192: 125, 193: 126, 194: 127, 195: 128, 196: 129, 197: 130, \
+		# 198: 131, 199: 132, 200: 133}
+
+		induvidual_masks = labels["individual_masks"]._numpy()
+		# contig_mask = labels["contigious_mask"]._numpy().copy()
+		# instance_mask = labels["instance_mask"]._numpy().copy()
+		# zero_mask = np.zeros((induvidual_masks.shape[2], induvidual_masks.shape[3]), dtype=induvidual_masks.dtype)
+		class_ids = labels["unique_ids"]._numpy().copy()
+
+			# induvidual_masks_in_image = induvidual_masks[size, :, :, :, :]
+			# instance_mask_in_image = instance_mask[size, :, :, :]
+			# contig_mask_in_image = contig_mask[size, :, :, :]
+			# combined_mask = np.array([[tuple((contig_mask_in_image[i, j], instance_mask_in_image[i, j])) for j in range(contig_mask_in_image.shape[1])] for i in range(contig_mask_in_image.shape[0])])
+			
+			# with open('/depot/davisjam/data/akshath/exps/tf/indu_masks/indu_masks.txt', 'w') as file: 
+			# 	file.write(str(combined_mask) + '\n')
+			# 	file.write(str(np.unique(combined_mask, axis=0)) + '\n')
+
+			# for a in np.unique(instance_mask_in_image): 
+			# 	plt.imshow(instance_mask_in_image == a)
+			# 	plt.savefig(f'/depot/davisjam/data/akshath/exps/tf/indu_masks/my_image__{size}_{a}.png')
+
+			# unique_ids = class_ids[size, :]
+			# # np.save('/depot/davisjam/data/akshath/exps/tf/indu_masks/instance.npy', instance_mask_in_image)
+			# return 
+			# for i, class_id in enumerate(unique_ids):
+			# 	if class_id != 0:
+			# 		print(class_id)		
+					# instance_mask_in_image[instance_mask_in_image == i]	
+					# if induvidual_masks_in_image[i,:,:,:]
+				# if not np.all((induvidual_masks_in_image[i,:,:,:] == 0) | (induvidual_masks_in_image[i,:,:,:] == mapped_id)):
+					# induvidual_masks_in_image[i, :, :, :] = np.array([[mapped_id for x in row] for row in induvidual_masks_in_image[i, :, :, :]])
+
+		for size in range(len(class_ids)): 
+
+			# background_non_empty_mask = 0 
+			labelled_empty_mask = 0 
+			# background_empty_mask = 0
+
+			for i, mask in enumerate(induvidual_masks[size, :, :, :, :]):
+				if class_ids[size][i] != 0:
+					if np.all(mask == 0): 
+						labelled_empty_mask += 1
+						class_ids[size][i] = 0						
+
+			self.labelled_empty_mask[self.temp] = labelled_empty_mask
+
+			with open(os.path.join(self.log_dir, 'background_empty_mask.txt'), 'w') as file: 
+				file.write(str(self.background_empty_mask) + '\n')
+			with open(os.path.join(self.log_dir, 'labelled_empty_mask.txt'), 'w') as file: 
+				file.write(str(self.labelled_empty_mask) + '\n')
+			with open(os.path.join(self.log_dir, 'background_non_empty_mask.txt'), 'w') as file: 
+				file.write(str(self.background_non_empty_mask) + '\n')
+			with open(os.path.join(self.log_dir, 'class_id_counts.txt'), 'w') as file: 
+				file.write(str(self.class_id_counts) + '\n')
+			with open(os.path.join(self.log_dir, 'class_ids.txt'), 'w') as file: 
+				file.write(str(self.class_ids) + '\n')
 		
+		return tf.convert_to_tensor(induvidual_masks)
+
+
 	def train_step(self, inputs: Tuple[Any, Any],model: tf.keras.Model, optimizer: tf.keras.optimizers.Optimizer, metrics: Optional[List[Any]] = None) -> Dict[str, Any]:
 		"""
 		Does forward and backward.
@@ -202,8 +362,55 @@ def train_step(self, inputs: Tuple[Any, Any],model: tf.keras.Model, optimizer: t
 		"""
 						
 		features, labels = inputs
+
+		# features = tf.convert_to_tensor(np.load('/depot/davisjam/data/akshath/exps/resnet/raw/features.npy')) 
+		# for val in labels: 
+		# 	labels[val] = tf.convert_to_tensor(np.load(f'/depot/davisjam/data/akshath/exps/resnet/raw/{val}.npy'))
+
+		# np.save('/depot/davisjam/data/akshath/exps/tf/resnet/raw/features.npy', tf.cast(features, np.float32)._numpy())
+		# for lab in labels: 
+			# np.save(f'/depot/davisjam/data/akshath/exps/tf/resnet/raw/{lab}.npy', tf.cast(labels[lab], np.float32)._numpy())
+
+
+		# self.temp += 2
+		# all_unique_ids = labels["unique_ids"]._numpy()
+		# for size in range(all_unique_ids.shape[0]):
+		# 	unique_ids = all_unique_ids[size, :]
+		# 	for class_id in unique_ids: 
+		# 		if class_id in self.class_ids: 
+		# 			self.class_ids[class_id] += 1
+		# 		else: 
+		# 			self.class_ids[class_id] = 1
+
+		# print(self.temp)
+		# with open(os.path.join(self.log_dir, 'class_ids.txt'), 'w') as file: 
+			# file.write(str(self.class_ids) + '\n')
+
+		# self._log_classes(labels)
+		# labels["individual_masks"] = self._check_induvidual_masks(labels, self._log_classes(labels))
+
+		# # for param in model.trainable_variables:
+		# # 	name = param.name.replace('/', '-')
+		# # 	np.save(f"/depot/davisjam/data/akshath/exps/tf/weights_biases/{name}.npy", param.numpy())  
+
+		# # with open('/depot/davisjam/data/akshath/exps/tf/indu_masks/indu_masks.txt', 'w') as file: 
+		# # 	file.write(str(labels) + '\n')
+
+
+		# # raise ValueError('Init') 
+		
+		# # labels["individual_masks"] = self._check_induvidual_masks(labels, self._log_classes(labels))
+		# # labels["contigious_mask"] = self._check_contigious_mask(labels)
+
 		with tf.GradientTape() as tape:
 			outputs = model(features, training=True)
+			# print(backbone_feature_maps_procesed.keys())
+			
+			# for val in backbone_feature_maps_procesed: 
+			# 	print(backbone_feature_maps_procesed[val])
+			# 	print(backbone_feature_maps_procesed[val].numpy())
+			# 	np.save(os.path.join('/depot/davisjam/data/akshath/exps/resnet/tf', 'backbone_feature_maps_procesed_' + str(val) + '.npy'), backbone_feature_maps_procesed[val].numpy())
+
 			##########################################################
 			# FIXME : This loop must be used for auxilary outputs
 			loss = 0.0
@@ -226,11 +433,17 @@ def train_step(self, inputs: Tuple[Any, Any],model: tf.keras.Model, optimizer: t
 			
 			##########################################################################
 			
-			# TODO : Add auxiallary losses
 			total_loss, cls_loss, focal_loss, dice_loss = self.build_losses(output=outputs, labels=labels)
 			scaled_loss = total_loss
+
 			if isinstance(optimizer, tf.keras.mixed_precision.LossScaleOptimizer):
 				total_loss = optimizer.get_scaled_loss(scaled_loss)
+		
+		print('Total loss : ', total_loss)
+		print('Cls loss : ', cls_loss)
+		print('Focal loss : ', focal_loss)
+		print('Dice loss : ', dice_loss)
+
 		tvars = model.trainable_variables	
 		grads = tape.gradient(scaled_loss,tvars)
 
@@ -243,11 +456,40 @@ def train_step(self, inputs: Tuple[Any, Any],model: tf.keras.Model, optimizer: t
 			pred_labels = tf.argmax(probs, axis=-1)
 			print("Target labels :", labels["unique_ids"])
 			print("Output labels :", pred_labels)
+
+		# temp = {} 
+		# for grad, param in zip(grads, tvars): 
+		# 	temp[param.name] = tf.norm(grad).numpy()
+
+		# for param in temp: 
+		# 	if param not in self.plot_collection: 
+		# 		self.plot_collection[param] = []
+		# 	else: 
+		# 		self.plot_collection[param]	+= [temp[param]]
+		# self.plot_collection_labels[0] += [len(np.unique(pred_labels).tolist())]
+
+		self.temp += int(os.environ.get('TRAIN_BATCH_SIZE'))
+		with open(os.path.join(self.log_dir, 'checking_labels.txt'), 'a') as file:
+			file.write(str(self.temp) + '\n')
+			file.write(str(labels["unique_ids"].numpy()) + '\n')
+			file.write(str(pred_labels.numpy())+ '\n')
+			file.write(f"{total_loss}, {cls_loss}, {focal_loss}, {dice_loss}" + '\n')
+			file.write('-----------------------------------' + '\n')
+
+		# if (sum(temp.values()) == 0) or (len(np.unique(pred_labels).tolist()) == 1 and np.unique(pred_labels).tolist()[0] == 0): 
+		# 	with open('/depot/davisjam/data/akshath/exps/tf/editing_layers/numIters.txt', 'a') as file: 
+		# 		file.write(str('numIters : ' + str(self.temp)) + '\n')
+		# 	with open('/depot/davisjam/data/akshath/exps/tf/vishal_plot/dict.txt', 'w') as file: 
+		# 		file.write(str(self.plot_collection))
+		# 	with open('/depot/davisjam/data/akshath/exps/tf/vishal_plot/dict_labels.txt', 'w') as file: 
+		# 			file.write(str(self.plot_collection_labels))
+
+		# 	raise ValueError('Stop2')
 		
 		# # Multiply for logging.
 		# # Since we expect the gradient replica sum to happen in the optimizer,
 		# # the loss is scaled with global num_boxes and weights.
-		# # To have it more interpretable/comparable we scale it back when logging.
+		# # # To have it more interpretable/comparable we scale it back when logging.
 		num_replicas_in_sync = tf.distribute.get_strategy().num_replicas_in_sync
 		total_loss *= num_replicas_in_sync
 		cls_loss *= num_replicas_in_sync
diff --git a/models/official/projects/maskformer/train.py b/models/official/projects/maskformer/train.py
index a59c91d7..5eba5998 100644
--- a/models/official/projects/maskformer/train.py
+++ b/models/official/projects/maskformer/train.py
@@ -26,7 +26,6 @@
 from official.core import train_lib
 from official.core import train_utils
 from official.modeling import performance
-from cloud_tpu_client import Client
 
 from official.projects.maskformer.configs import maskformer
 from official.projects.maskformer.tasks import panoptic_maskformer
@@ -35,10 +34,13 @@
 def main(_):
 
 	if FLAGS.tpu:
+		from cloud_tpu_client import Client
 		# This is for configuring the TPU software version programatically
 		c = Client(os.environ['TPU_NAME'], zone=os.environ['TPU_ZONE'], project=os.environ['TPU_PROJECT'])
 		c.configure_tpu_version(os.environ["TPU_SOFTWARE"], restart_type='ifNeeded')
-		c.wait_for_healthy()
+		c.wait_for_healthy()	
+	else: 
+		os.environ['TF_GPU_ALLOCATOR'] = 'cuda_malloc_async'
 
 
 	gin.parse_config_files_and_bindings(FLAGS.gin_file, FLAGS.gin_params)
diff --git a/models/official/projects/maskformer/train/events.out.tfevents.1696795169.gilbreth-g000.rcac.purdue.edu.86093.0.v2 b/models/official/projects/maskformer/train/events.out.tfevents.1696795169.gilbreth-g000.rcac.purdue.edu.86093.0.v2
new file mode 100644
index 00000000..a23927ab
Binary files /dev/null and b/models/official/projects/maskformer/train/events.out.tfevents.1696795169.gilbreth-g000.rcac.purdue.edu.86093.0.v2 differ
diff --git a/models/official/projects/maskformer/train/events.out.tfevents.1696795544.gilbreth-g000.rcac.purdue.edu.88080.0.v2 b/models/official/projects/maskformer/train/events.out.tfevents.1696795544.gilbreth-g000.rcac.purdue.edu.88080.0.v2
new file mode 100644
index 00000000..797b4253
Binary files /dev/null and b/models/official/projects/maskformer/train/events.out.tfevents.1696795544.gilbreth-g000.rcac.purdue.edu.88080.0.v2 differ
diff --git a/models/official/projects/maskformer/train/events.out.tfevents.1696800260.gilbreth-g000.rcac.purdue.edu.105971.0.v2 b/models/official/projects/maskformer/train/events.out.tfevents.1696800260.gilbreth-g000.rcac.purdue.edu.105971.0.v2
new file mode 100644
index 00000000..ad02ab55
Binary files /dev/null and b/models/official/projects/maskformer/train/events.out.tfevents.1696800260.gilbreth-g000.rcac.purdue.edu.105971.0.v2 differ
diff --git a/models/official/projects/maskformer/train/events.out.tfevents.1696800341.gilbreth-g000.rcac.purdue.edu.106666.0.v2 b/models/official/projects/maskformer/train/events.out.tfevents.1696800341.gilbreth-g000.rcac.purdue.edu.106666.0.v2
new file mode 100644
index 00000000..ab649f95
Binary files /dev/null and b/models/official/projects/maskformer/train/events.out.tfevents.1696800341.gilbreth-g000.rcac.purdue.edu.106666.0.v2 differ
diff --git a/models/official/projects/maskformer/train/events.out.tfevents.1696801211.gilbreth-g000.rcac.purdue.edu.109453.0.v2 b/models/official/projects/maskformer/train/events.out.tfevents.1696801211.gilbreth-g000.rcac.purdue.edu.109453.0.v2
new file mode 100644
index 00000000..8f8c0d54
Binary files /dev/null and b/models/official/projects/maskformer/train/events.out.tfevents.1696801211.gilbreth-g000.rcac.purdue.edu.109453.0.v2 differ
diff --git a/models/official/projects/maskformer/train/events.out.tfevents.1696801269.gilbreth-g000.rcac.purdue.edu.110006.0.v2 b/models/official/projects/maskformer/train/events.out.tfevents.1696801269.gilbreth-g000.rcac.purdue.edu.110006.0.v2
new file mode 100644
index 00000000..da864f56
Binary files /dev/null and b/models/official/projects/maskformer/train/events.out.tfevents.1696801269.gilbreth-g000.rcac.purdue.edu.110006.0.v2 differ
diff --git a/models/official/projects/maskformer/train/events.out.tfevents.1696801575.gilbreth-g000.rcac.purdue.edu.111277.0.v2 b/models/official/projects/maskformer/train/events.out.tfevents.1696801575.gilbreth-g000.rcac.purdue.edu.111277.0.v2
new file mode 100644
index 00000000..2e836610
Binary files /dev/null and b/models/official/projects/maskformer/train/events.out.tfevents.1696801575.gilbreth-g000.rcac.purdue.edu.111277.0.v2 differ
diff --git a/models/official/projects/maskformer/train/events.out.tfevents.1696802244.gilbreth-g000.rcac.purdue.edu.113560.0.v2 b/models/official/projects/maskformer/train/events.out.tfevents.1696802244.gilbreth-g000.rcac.purdue.edu.113560.0.v2
new file mode 100644
index 00000000..51d8484e
Binary files /dev/null and b/models/official/projects/maskformer/train/events.out.tfevents.1696802244.gilbreth-g000.rcac.purdue.edu.113560.0.v2 differ
diff --git a/models/official/projects/maskformer/train/events.out.tfevents.1696802695.gilbreth-g000.rcac.purdue.edu.115248.0.v2 b/models/official/projects/maskformer/train/events.out.tfevents.1696802695.gilbreth-g000.rcac.purdue.edu.115248.0.v2
new file mode 100644
index 00000000..d0b6f1c3
Binary files /dev/null and b/models/official/projects/maskformer/train/events.out.tfevents.1696802695.gilbreth-g000.rcac.purdue.edu.115248.0.v2 differ
diff --git a/models/official/projects/maskformer/train/events.out.tfevents.1696803587.gilbreth-g000.rcac.purdue.edu.118187.0.v2 b/models/official/projects/maskformer/train/events.out.tfevents.1696803587.gilbreth-g000.rcac.purdue.edu.118187.0.v2
new file mode 100644
index 00000000..37850292
Binary files /dev/null and b/models/official/projects/maskformer/train/events.out.tfevents.1696803587.gilbreth-g000.rcac.purdue.edu.118187.0.v2 differ
diff --git a/models/official/projects/maskformer/train/events.out.tfevents.1696871050.gilbreth-g007.rcac.purdue.edu.96221.0.v2 b/models/official/projects/maskformer/train/events.out.tfevents.1696871050.gilbreth-g007.rcac.purdue.edu.96221.0.v2
new file mode 100644
index 00000000..43fa6c66
Binary files /dev/null and b/models/official/projects/maskformer/train/events.out.tfevents.1696871050.gilbreth-g007.rcac.purdue.edu.96221.0.v2 differ
diff --git a/models/official/projects/maskformer/train/events.out.tfevents.1696907351.gilbreth-g005.rcac.purdue.edu.122426.0.v2 b/models/official/projects/maskformer/train/events.out.tfevents.1696907351.gilbreth-g005.rcac.purdue.edu.122426.0.v2
new file mode 100644
index 00000000..2015b0fa
Binary files /dev/null and b/models/official/projects/maskformer/train/events.out.tfevents.1696907351.gilbreth-g005.rcac.purdue.edu.122426.0.v2 differ
diff --git a/models/official/projects/maskformer/train/events.out.tfevents.1696907805.gilbreth-g005.rcac.purdue.edu.128399.0.v2 b/models/official/projects/maskformer/train/events.out.tfevents.1696907805.gilbreth-g005.rcac.purdue.edu.128399.0.v2
new file mode 100644
index 00000000..d2080f2e
Binary files /dev/null and b/models/official/projects/maskformer/train/events.out.tfevents.1696907805.gilbreth-g005.rcac.purdue.edu.128399.0.v2 differ
diff --git a/models/official/projects/maskformer/train/events.out.tfevents.1696907918.gilbreth-g005.rcac.purdue.edu.129094.0.v2 b/models/official/projects/maskformer/train/events.out.tfevents.1696907918.gilbreth-g005.rcac.purdue.edu.129094.0.v2
new file mode 100644
index 00000000..a6990459
Binary files /dev/null and b/models/official/projects/maskformer/train/events.out.tfevents.1696907918.gilbreth-g005.rcac.purdue.edu.129094.0.v2 differ
diff --git a/models/official/projects/maskformer/train/events.out.tfevents.1696909420.gilbreth-g005.rcac.purdue.edu.3453.0.v2 b/models/official/projects/maskformer/train/events.out.tfevents.1696909420.gilbreth-g005.rcac.purdue.edu.3453.0.v2
new file mode 100644
index 00000000..f5cf9d4f
Binary files /dev/null and b/models/official/projects/maskformer/train/events.out.tfevents.1696909420.gilbreth-g005.rcac.purdue.edu.3453.0.v2 differ
diff --git a/models/official/projects/maskformer/train/events.out.tfevents.1696913554.gilbreth-g005.rcac.purdue.edu.17701.0.v2 b/models/official/projects/maskformer/train/events.out.tfevents.1696913554.gilbreth-g005.rcac.purdue.edu.17701.0.v2
new file mode 100644
index 00000000..268bcb99
Binary files /dev/null and b/models/official/projects/maskformer/train/events.out.tfevents.1696913554.gilbreth-g005.rcac.purdue.edu.17701.0.v2 differ
diff --git a/models/official/projects/maskformer/train/events.out.tfevents.1696913908.gilbreth-k019.rcac.purdue.edu.98368.0.v2 b/models/official/projects/maskformer/train/events.out.tfevents.1696913908.gilbreth-k019.rcac.purdue.edu.98368.0.v2
new file mode 100644
index 00000000..6b2bb167
Binary files /dev/null and b/models/official/projects/maskformer/train/events.out.tfevents.1696913908.gilbreth-k019.rcac.purdue.edu.98368.0.v2 differ
diff --git a/models/official/projects/maskformer/train/events.out.tfevents.1696914015.gilbreth-k019.rcac.purdue.edu.99870.0.v2 b/models/official/projects/maskformer/train/events.out.tfevents.1696914015.gilbreth-k019.rcac.purdue.edu.99870.0.v2
new file mode 100644
index 00000000..a1fc99c5
Binary files /dev/null and b/models/official/projects/maskformer/train/events.out.tfevents.1696914015.gilbreth-k019.rcac.purdue.edu.99870.0.v2 differ
diff --git a/models/official/projects/maskformer/train/events.out.tfevents.1696915048.gilbreth-k019.rcac.purdue.edu.118639.0.v2 b/models/official/projects/maskformer/train/events.out.tfevents.1696915048.gilbreth-k019.rcac.purdue.edu.118639.0.v2
new file mode 100644
index 00000000..ab8e9f24
Binary files /dev/null and b/models/official/projects/maskformer/train/events.out.tfevents.1696915048.gilbreth-k019.rcac.purdue.edu.118639.0.v2 differ
diff --git a/models/official/projects/maskformer/train/events.out.tfevents.1699527985.gilbreth-k012.rcac.purdue.edu.121017.0.v2 b/models/official/projects/maskformer/train/events.out.tfevents.1699527985.gilbreth-k012.rcac.purdue.edu.121017.0.v2
new file mode 100644
index 00000000..e3664331
Binary files /dev/null and b/models/official/projects/maskformer/train/events.out.tfevents.1699527985.gilbreth-k012.rcac.purdue.edu.121017.0.v2 differ
diff --git a/models/official/projects/maskformer/train/events.out.tfevents.1699567985.gilbreth-fe00.rcac.purdue.edu.57387.0.v2 b/models/official/projects/maskformer/train/events.out.tfevents.1699567985.gilbreth-fe00.rcac.purdue.edu.57387.0.v2
new file mode 100644
index 00000000..128ada6a
Binary files /dev/null and b/models/official/projects/maskformer/train/events.out.tfevents.1699567985.gilbreth-fe00.rcac.purdue.edu.57387.0.v2 differ
diff --git a/models/official/projects/maskformer/train_on_cpu.sh b/models/official/projects/maskformer/train_on_cpu.sh
index 0f406729..b4f46db8 100755
--- a/models/official/projects/maskformer/train_on_cpu.sh
+++ b/models/official/projects/maskformer/train_on_cpu.sh
@@ -1,7 +1,7 @@
 #!/bin/bash
 train_bsize=1
 eval_bsize=1
-export PYTHONPATH=$PYTHONPATH:~/tf-maskformer/models
+export PYTHONPATH=$PYTHONPATH:/depot/davisjam/data/akshath/MaskFormer_tf/tf-maskformer/models
 export RESNET_CKPT="gs://cam2-models/maskformer_vishal_exps/resnet50_pretrained/tfmg/ckpt-62400"
 export MODEL_DIR="gs://cam2-models/maskformer_vishal_exps/EXP01_CPU"
 export TFRECORDS_DIR="gs://cam2-datasets/coco_panoptic/tfrecords"
@@ -15,7 +15,7 @@ export OVERRIDES="runtime.distribution_strategy=one_device,runtime.mixed_precisi
 task.train_data.global_batch_size=$TRAIN_BATCH_SIZE,\
 task.model.which_pixel_decoder=transformer_fpn,\
 task.init_checkpoint=$RESNET_CKPT"
-python3 models/official/projects/maskformer/train.py \
+python3 train.py \
   --experiment maskformer_coco_panoptic \
   --mode train \
   --model_dir $MODEL_DIR \
diff --git a/models/official/projects/maskformer/train_on_gpu.sh b/models/official/projects/maskformer/train_on_gpu.sh
index 81c8a16b..5cf44424 100755
--- a/models/official/projects/maskformer/train_on_gpu.sh
+++ b/models/official/projects/maskformer/train_on_gpu.sh
@@ -1,28 +1,30 @@
 #!/bin/bash
-# module load gcc/9.3.0 
-# cd /depot/qqiu/data/vishal/tf-maskformer/
-# conda activate /depot/qqiu/data/vishal/envs/tmaskformer/
-# module load anaconda/2020.11-py38
-# module load cuda/11.7.0 cudnn/cuda-11.7_8.6 gcc/6.3.0
-train_bsize=8
-eval_bsize=8
-export PYTHONPATH=$PYTHONPATH:/depot/qqiu/data/vishal/tf-maskformer/models
+
+train_bsize=2
+eval_bsize=1
+export PYTHONPATH=/depot/davisjam/data/akshath/MaskFormer_tf/tf-maskformer/models
 export MODEL_DIR="./"
-export RESNET_CKPT="/depot/davisjam/data/vishal/pretrained_ckpts/tfmg_resnet50/ckpt-62400"
 export TFRECORDS_DIR="/depot/davisjam/data/vishal/datasets/coco/tfrecords"
 export TRAIN_BATCH_SIZE=$train_bsize
 export EVAL_BATCH_SIZE=$eval_bsize
-export BASE_LR=0.00005
+export BASE_LR=0.0001
 export NO_OBJ_CLS_WEIGHT=0.1
 export IMG_SIZE=640
 export PRINT_OUTPUTS=True
+# Akshath
+export ON_GPU=True
+export MODE="train"
+export LOG_DIR="/depot/davisjam/data/akshath/exps/focal"
+export RUN_NUMBER=1
+export USE_BFLOAT16_DATA=False
+# export STEPS_PER_EPOCH=10
+export NUM_EPOCH=1
+# export VAL_STEPS=20
 export OVERRIDES="runtime.distribution_strategy=one_device,runtime.num_gpus=1,runtime.mixed_precision_dtype=float32,\
 task.train_data.global_batch_size=$train_bsize,\
-task.model.which_pixel_decoder=transformer_fpn,\
-task.init_checkpoint=$RESNET_CKPT"
-python3 models/official/projects/maskformer/train.py \
+task.model.which_pixel_decoder=transformer_fpn"
+python3 train.py \
   --experiment maskformer_coco_panoptic \
-  --mode eval \
+  --mode $MODE \
   --model_dir $MODEL_DIR \
-  --params_override=$OVERRIDES
-
+  --params_override=$OVERRIDES