diff --git a/category_mask.npy b/category_mask.npy
new file mode 100644
index 00000000..1ad094d5
Binary files /dev/null and b/category_mask.npy differ
diff --git a/img.npy b/img.npy
new file mode 100644
index 00000000..72ca0b47
Binary files /dev/null and b/img.npy differ
diff --git a/individual_masks.npy b/individual_masks.npy
new file mode 100644
index 00000000..65900a4d
Binary files /dev/null and b/individual_masks.npy differ
diff --git a/instance_mask.npy b/instance_mask.npy
new file mode 100644
index 00000000..a9a691dc
Binary files /dev/null and b/instance_mask.npy differ
diff --git a/models/official/__pycache__/__init__.cpython-38.pyc b/models/official/__pycache__/__init__.cpython-38.pyc
new file mode 100644
index 00000000..d825fb2a
Binary files /dev/null and b/models/official/__pycache__/__init__.cpython-38.pyc differ
diff --git a/models/official/__pycache__/__init__.cpython-39.pyc b/models/official/__pycache__/__init__.cpython-39.pyc
index c83ad39d..bba3a2aa 100644
Binary files a/models/official/__pycache__/__init__.cpython-39.pyc and b/models/official/__pycache__/__init__.cpython-39.pyc differ
diff --git a/models/official/common/__pycache__/__init__.cpython-38.pyc b/models/official/common/__pycache__/__init__.cpython-38.pyc
new file mode 100644
index 00000000..23e9e1b4
Binary files /dev/null and b/models/official/common/__pycache__/__init__.cpython-38.pyc differ
diff --git a/models/official/common/__pycache__/__init__.cpython-39.pyc b/models/official/common/__pycache__/__init__.cpython-39.pyc
index 36f7d95b..28f12c15 100644
Binary files a/models/official/common/__pycache__/__init__.cpython-39.pyc and b/models/official/common/__pycache__/__init__.cpython-39.pyc differ
diff --git a/models/official/common/__pycache__/dataset_fn.cpython-39.pyc b/models/official/common/__pycache__/dataset_fn.cpython-39.pyc
index 75b8cb04..3ad97d06 100644
Binary files a/models/official/common/__pycache__/dataset_fn.cpython-39.pyc and b/models/official/common/__pycache__/dataset_fn.cpython-39.pyc differ
diff --git a/models/official/common/__pycache__/distribute_utils.cpython-38.pyc b/models/official/common/__pycache__/distribute_utils.cpython-38.pyc
new file mode 100644
index 00000000..fc9cd4e2
Binary files /dev/null and b/models/official/common/__pycache__/distribute_utils.cpython-38.pyc differ
diff --git a/models/official/common/__pycache__/distribute_utils.cpython-39.pyc b/models/official/common/__pycache__/distribute_utils.cpython-39.pyc
index e3814841..a3427945 100644
Binary files a/models/official/common/__pycache__/distribute_utils.cpython-39.pyc and b/models/official/common/__pycache__/distribute_utils.cpython-39.pyc differ
diff --git a/models/official/common/distribute_utils.py b/models/official/common/distribute_utils.py
index 58d63038..19d417be 100644
--- a/models/official/common/distribute_utils.py
+++ b/models/official/common/distribute_utils.py
@@ -82,8 +82,9 @@ def tpu_initialize(tpu_address):
   Returns:
     A TPUClusterResolver.
   """
+  
   cluster_resolver = tf.distribute.cluster_resolver.TPUClusterResolver(
-      tpu=tpu_address)
+      tpu=tpu_address, project=os.environ["TPU_PROJECT"], zone=os.environ["TPU_ZONE"])
   if tpu_address not in ("", "local"):
     tf.config.experimental_connect_to_cluster(cluster_resolver)
   tf.tpu.experimental.initialize_tpu_system(cluster_resolver)
diff --git a/models/official/core/__pycache__/__init__.cpython-38.pyc b/models/official/core/__pycache__/__init__.cpython-38.pyc
new file mode 100644
index 00000000..e1f049c5
Binary files /dev/null and b/models/official/core/__pycache__/__init__.cpython-38.pyc differ
diff --git a/models/official/core/__pycache__/__init__.cpython-39.pyc b/models/official/core/__pycache__/__init__.cpython-39.pyc
index bef69d9a..ebbb5b35 100644
Binary files a/models/official/core/__pycache__/__init__.cpython-39.pyc and b/models/official/core/__pycache__/__init__.cpython-39.pyc differ
diff --git a/models/official/core/__pycache__/actions.cpython-38.pyc b/models/official/core/__pycache__/actions.cpython-38.pyc
new file mode 100644
index 00000000..e8e69ffc
Binary files /dev/null and b/models/official/core/__pycache__/actions.cpython-38.pyc differ
diff --git a/models/official/core/__pycache__/actions.cpython-39.pyc b/models/official/core/__pycache__/actions.cpython-39.pyc
index e97138b8..d3f04d69 100644
Binary files a/models/official/core/__pycache__/actions.cpython-39.pyc and b/models/official/core/__pycache__/actions.cpython-39.pyc differ
diff --git a/models/official/core/__pycache__/base_task.cpython-38.pyc b/models/official/core/__pycache__/base_task.cpython-38.pyc
new file mode 100644
index 00000000..39c6776e
Binary files /dev/null and b/models/official/core/__pycache__/base_task.cpython-38.pyc differ
diff --git a/models/official/core/__pycache__/base_task.cpython-39.pyc b/models/official/core/__pycache__/base_task.cpython-39.pyc
index 9d445b11..0fd3985a 100644
Binary files a/models/official/core/__pycache__/base_task.cpython-39.pyc and b/models/official/core/__pycache__/base_task.cpython-39.pyc differ
diff --git a/models/official/core/__pycache__/base_trainer.cpython-38.pyc b/models/official/core/__pycache__/base_trainer.cpython-38.pyc
new file mode 100644
index 00000000..4a7940a4
Binary files /dev/null and b/models/official/core/__pycache__/base_trainer.cpython-38.pyc differ
diff --git a/models/official/core/__pycache__/base_trainer.cpython-39.pyc b/models/official/core/__pycache__/base_trainer.cpython-39.pyc
index 0fab0ffe..aac41892 100644
Binary files a/models/official/core/__pycache__/base_trainer.cpython-39.pyc and b/models/official/core/__pycache__/base_trainer.cpython-39.pyc differ
diff --git a/models/official/core/__pycache__/config_definitions.cpython-38.pyc b/models/official/core/__pycache__/config_definitions.cpython-38.pyc
new file mode 100644
index 00000000..52c050c6
Binary files /dev/null and b/models/official/core/__pycache__/config_definitions.cpython-38.pyc differ
diff --git a/models/official/core/__pycache__/config_definitions.cpython-39.pyc b/models/official/core/__pycache__/config_definitions.cpython-39.pyc
index 68a11afe..b25ae89e 100644
Binary files a/models/official/core/__pycache__/config_definitions.cpython-39.pyc and b/models/official/core/__pycache__/config_definitions.cpython-39.pyc differ
diff --git a/models/official/core/__pycache__/exp_factory.cpython-39.pyc b/models/official/core/__pycache__/exp_factory.cpython-39.pyc
index 1dea8ed8..cd0c06df 100644
Binary files a/models/official/core/__pycache__/exp_factory.cpython-39.pyc and b/models/official/core/__pycache__/exp_factory.cpython-39.pyc differ
diff --git a/models/official/core/__pycache__/export_base.cpython-39.pyc b/models/official/core/__pycache__/export_base.cpython-39.pyc
index c9b689b1..f06560a8 100644
Binary files a/models/official/core/__pycache__/export_base.cpython-39.pyc and b/models/official/core/__pycache__/export_base.cpython-39.pyc differ
diff --git a/models/official/core/__pycache__/file_writers.cpython-39.pyc b/models/official/core/__pycache__/file_writers.cpython-39.pyc
index 651e7a39..74ecf973 100644
Binary files a/models/official/core/__pycache__/file_writers.cpython-39.pyc and b/models/official/core/__pycache__/file_writers.cpython-39.pyc differ
diff --git a/models/official/core/__pycache__/input_reader.cpython-39.pyc b/models/official/core/__pycache__/input_reader.cpython-39.pyc
index 23130683..ba60335a 100644
Binary files a/models/official/core/__pycache__/input_reader.cpython-39.pyc and b/models/official/core/__pycache__/input_reader.cpython-39.pyc differ
diff --git a/models/official/core/__pycache__/registry.cpython-39.pyc b/models/official/core/__pycache__/registry.cpython-39.pyc
index 3b030d93..96260adb 100644
Binary files a/models/official/core/__pycache__/registry.cpython-39.pyc and b/models/official/core/__pycache__/registry.cpython-39.pyc differ
diff --git a/models/official/core/__pycache__/savedmodel_checkpoint_manager.cpython-39.pyc b/models/official/core/__pycache__/savedmodel_checkpoint_manager.cpython-39.pyc
index b313d2bf..b04baea2 100644
Binary files a/models/official/core/__pycache__/savedmodel_checkpoint_manager.cpython-39.pyc and b/models/official/core/__pycache__/savedmodel_checkpoint_manager.cpython-39.pyc differ
diff --git a/models/official/core/__pycache__/task_factory.cpython-39.pyc b/models/official/core/__pycache__/task_factory.cpython-39.pyc
index bf45deea..573884ea 100644
Binary files a/models/official/core/__pycache__/task_factory.cpython-39.pyc and b/models/official/core/__pycache__/task_factory.cpython-39.pyc differ
diff --git a/models/official/core/__pycache__/tf_example_builder.cpython-39.pyc b/models/official/core/__pycache__/tf_example_builder.cpython-39.pyc
index f4c56c99..6a47ec37 100644
Binary files a/models/official/core/__pycache__/tf_example_builder.cpython-39.pyc and b/models/official/core/__pycache__/tf_example_builder.cpython-39.pyc differ
diff --git a/models/official/core/__pycache__/tf_example_feature_key.cpython-39.pyc b/models/official/core/__pycache__/tf_example_feature_key.cpython-39.pyc
index 2c950baf..2f001042 100644
Binary files a/models/official/core/__pycache__/tf_example_feature_key.cpython-39.pyc and b/models/official/core/__pycache__/tf_example_feature_key.cpython-39.pyc differ
diff --git a/models/official/core/__pycache__/train_lib.cpython-39.pyc b/models/official/core/__pycache__/train_lib.cpython-39.pyc
index 21d735b6..1ea705d2 100644
Binary files a/models/official/core/__pycache__/train_lib.cpython-39.pyc and b/models/official/core/__pycache__/train_lib.cpython-39.pyc differ
diff --git a/models/official/core/__pycache__/train_utils.cpython-39.pyc b/models/official/core/__pycache__/train_utils.cpython-39.pyc
index 8f2f8d31..d29330af 100644
Binary files a/models/official/core/__pycache__/train_utils.cpython-39.pyc and b/models/official/core/__pycache__/train_utils.cpython-39.pyc differ
diff --git a/models/official/core/actions.py b/models/official/core/actions.py
index 5a092b8a..7b4f4195 100644
--- a/models/official/core/actions.py
+++ b/models/official/core/actions.py
@@ -222,15 +222,15 @@ def get_train_actions(
     )
     train_actions.append(recover_action)
 
-  if (
-      params.trainer.preemption_on_demand_checkpoint
-      and trainer.strategy.cluster_resolver
-  ):
-    on_demand_checkpoint_action = orbit.actions.SaveCheckpointIfPreempted(
-        trainer.strategy.cluster_resolver,
-        checkpoint_manager,
-        trainer.global_step,
-        keep_running_after_save=True,
-    )
-    train_actions.append(on_demand_checkpoint_action)
+  # if (
+  #     params.trainer.preemption_on_demand_checkpoint
+  #     and trainer.strategy.cluster_resolver
+  # ):
+    # on_demand_checkpoint_action = orbit.actions.SaveCheckpointIfPreempted(
+    #     trainer.strategy.cluster_resolver,
+    #     checkpoint_manager,
+    #     trainer.global_step,
+    #     keep_running_after_save=True,
+    # )
+    # train_actions.append(on_demand_checkpoint_action)
   return train_actions
diff --git a/models/official/core/base_trainer.py b/models/official/core/base_trainer.py
index a341d128..9d31ce2d 100644
--- a/models/official/core/base_trainer.py
+++ b/models/official/core/base_trainer.py
@@ -335,6 +335,7 @@ def train_loop_end(self):
       # Maybe a self-implemented optimizer does not have `optimizer.iterations`.
       # So just to be safe here.
       if hasattr(self.optimizer, "iterations"):
+        
         logs["learning_rate"] = self.optimizer.learning_rate(
             self.optimizer.iterations)
       else:
diff --git a/models/official/modeling/__pycache__/__init__.cpython-38.pyc b/models/official/modeling/__pycache__/__init__.cpython-38.pyc
new file mode 100644
index 00000000..da7494b5
Binary files /dev/null and b/models/official/modeling/__pycache__/__init__.cpython-38.pyc differ
diff --git a/models/official/modeling/__pycache__/__init__.cpython-39.pyc b/models/official/modeling/__pycache__/__init__.cpython-39.pyc
index da533414..cfe2a98c 100644
Binary files a/models/official/modeling/__pycache__/__init__.cpython-39.pyc and b/models/official/modeling/__pycache__/__init__.cpython-39.pyc differ
diff --git a/models/official/modeling/__pycache__/performance.cpython-39.pyc b/models/official/modeling/__pycache__/performance.cpython-39.pyc
index 86e209f6..1c8b31e7 100644
Binary files a/models/official/modeling/__pycache__/performance.cpython-39.pyc and b/models/official/modeling/__pycache__/performance.cpython-39.pyc differ
diff --git a/models/official/modeling/__pycache__/tf_utils.cpython-39.pyc b/models/official/modeling/__pycache__/tf_utils.cpython-39.pyc
index 1d54831c..a8d6387e 100644
Binary files a/models/official/modeling/__pycache__/tf_utils.cpython-39.pyc and b/models/official/modeling/__pycache__/tf_utils.cpython-39.pyc differ
diff --git a/models/official/modeling/activations/__pycache__/__init__.cpython-39.pyc b/models/official/modeling/activations/__pycache__/__init__.cpython-39.pyc
index 2b14d6c1..fa0c8b96 100644
Binary files a/models/official/modeling/activations/__pycache__/__init__.cpython-39.pyc and b/models/official/modeling/activations/__pycache__/__init__.cpython-39.pyc differ
diff --git a/models/official/modeling/activations/__pycache__/gelu.cpython-39.pyc b/models/official/modeling/activations/__pycache__/gelu.cpython-39.pyc
index 0b4a764a..a5b90f75 100644
Binary files a/models/official/modeling/activations/__pycache__/gelu.cpython-39.pyc and b/models/official/modeling/activations/__pycache__/gelu.cpython-39.pyc differ
diff --git a/models/official/modeling/activations/__pycache__/mish.cpython-39.pyc b/models/official/modeling/activations/__pycache__/mish.cpython-39.pyc
index abd0959e..565a8e40 100644
Binary files a/models/official/modeling/activations/__pycache__/mish.cpython-39.pyc and b/models/official/modeling/activations/__pycache__/mish.cpython-39.pyc differ
diff --git a/models/official/modeling/activations/__pycache__/relu.cpython-39.pyc b/models/official/modeling/activations/__pycache__/relu.cpython-39.pyc
index 96ca7701..98a01bb9 100644
Binary files a/models/official/modeling/activations/__pycache__/relu.cpython-39.pyc and b/models/official/modeling/activations/__pycache__/relu.cpython-39.pyc differ
diff --git a/models/official/modeling/activations/__pycache__/sigmoid.cpython-39.pyc b/models/official/modeling/activations/__pycache__/sigmoid.cpython-39.pyc
index f8a6214e..a9949d74 100644
Binary files a/models/official/modeling/activations/__pycache__/sigmoid.cpython-39.pyc and b/models/official/modeling/activations/__pycache__/sigmoid.cpython-39.pyc differ
diff --git a/models/official/modeling/activations/__pycache__/swish.cpython-39.pyc b/models/official/modeling/activations/__pycache__/swish.cpython-39.pyc
index fd58e1fa..61a870a8 100644
Binary files a/models/official/modeling/activations/__pycache__/swish.cpython-39.pyc and b/models/official/modeling/activations/__pycache__/swish.cpython-39.pyc differ
diff --git a/models/official/modeling/hyperparams/__pycache__/__init__.cpython-38.pyc b/models/official/modeling/hyperparams/__pycache__/__init__.cpython-38.pyc
new file mode 100644
index 00000000..2e7b241e
Binary files /dev/null and b/models/official/modeling/hyperparams/__pycache__/__init__.cpython-38.pyc differ
diff --git a/models/official/modeling/hyperparams/__pycache__/__init__.cpython-39.pyc b/models/official/modeling/hyperparams/__pycache__/__init__.cpython-39.pyc
index b2a2a7e3..3c2db46a 100644
Binary files a/models/official/modeling/hyperparams/__pycache__/__init__.cpython-39.pyc and b/models/official/modeling/hyperparams/__pycache__/__init__.cpython-39.pyc differ
diff --git a/models/official/modeling/hyperparams/__pycache__/base_config.cpython-38.pyc b/models/official/modeling/hyperparams/__pycache__/base_config.cpython-38.pyc
new file mode 100644
index 00000000..7b21ccbe
Binary files /dev/null and b/models/official/modeling/hyperparams/__pycache__/base_config.cpython-38.pyc differ
diff --git a/models/official/modeling/hyperparams/__pycache__/base_config.cpython-39.pyc b/models/official/modeling/hyperparams/__pycache__/base_config.cpython-39.pyc
index 9b430b89..099a86d9 100644
Binary files a/models/official/modeling/hyperparams/__pycache__/base_config.cpython-39.pyc and b/models/official/modeling/hyperparams/__pycache__/base_config.cpython-39.pyc differ
diff --git a/models/official/modeling/hyperparams/__pycache__/oneof.cpython-38.pyc b/models/official/modeling/hyperparams/__pycache__/oneof.cpython-38.pyc
new file mode 100644
index 00000000..64617f5a
Binary files /dev/null and b/models/official/modeling/hyperparams/__pycache__/oneof.cpython-38.pyc differ
diff --git a/models/official/modeling/hyperparams/__pycache__/oneof.cpython-39.pyc b/models/official/modeling/hyperparams/__pycache__/oneof.cpython-39.pyc
index 1e1ea8b6..3857e324 100644
Binary files a/models/official/modeling/hyperparams/__pycache__/oneof.cpython-39.pyc and b/models/official/modeling/hyperparams/__pycache__/oneof.cpython-39.pyc differ
diff --git a/models/official/modeling/hyperparams/__pycache__/params_dict.cpython-38.pyc b/models/official/modeling/hyperparams/__pycache__/params_dict.cpython-38.pyc
new file mode 100644
index 00000000..22eecdbd
Binary files /dev/null and b/models/official/modeling/hyperparams/__pycache__/params_dict.cpython-38.pyc differ
diff --git a/models/official/modeling/hyperparams/__pycache__/params_dict.cpython-39.pyc b/models/official/modeling/hyperparams/__pycache__/params_dict.cpython-39.pyc
index 5149f39d..e3a90287 100644
Binary files a/models/official/modeling/hyperparams/__pycache__/params_dict.cpython-39.pyc and b/models/official/modeling/hyperparams/__pycache__/params_dict.cpython-39.pyc differ
diff --git a/models/official/modeling/optimization/__pycache__/__init__.cpython-38.pyc b/models/official/modeling/optimization/__pycache__/__init__.cpython-38.pyc
new file mode 100644
index 00000000..de8d0f1b
Binary files /dev/null and b/models/official/modeling/optimization/__pycache__/__init__.cpython-38.pyc differ
diff --git a/models/official/modeling/optimization/__pycache__/__init__.cpython-39.pyc b/models/official/modeling/optimization/__pycache__/__init__.cpython-39.pyc
index 3232e0c5..46e8ed4e 100644
Binary files a/models/official/modeling/optimization/__pycache__/__init__.cpython-39.pyc and b/models/official/modeling/optimization/__pycache__/__init__.cpython-39.pyc differ
diff --git a/models/official/modeling/optimization/__pycache__/adafactor_optimizer.cpython-39.pyc b/models/official/modeling/optimization/__pycache__/adafactor_optimizer.cpython-39.pyc
index 1f0d8567..c20f59f5 100644
Binary files a/models/official/modeling/optimization/__pycache__/adafactor_optimizer.cpython-39.pyc and b/models/official/modeling/optimization/__pycache__/adafactor_optimizer.cpython-39.pyc differ
diff --git a/models/official/modeling/optimization/__pycache__/ema_optimizer.cpython-38.pyc b/models/official/modeling/optimization/__pycache__/ema_optimizer.cpython-38.pyc
new file mode 100644
index 00000000..03c6e726
Binary files /dev/null and b/models/official/modeling/optimization/__pycache__/ema_optimizer.cpython-38.pyc differ
diff --git a/models/official/modeling/optimization/__pycache__/ema_optimizer.cpython-39.pyc b/models/official/modeling/optimization/__pycache__/ema_optimizer.cpython-39.pyc
index f06f80d8..cf4fcf4f 100644
Binary files a/models/official/modeling/optimization/__pycache__/ema_optimizer.cpython-39.pyc and b/models/official/modeling/optimization/__pycache__/ema_optimizer.cpython-39.pyc differ
diff --git a/models/official/modeling/optimization/__pycache__/lars_optimizer.cpython-39.pyc b/models/official/modeling/optimization/__pycache__/lars_optimizer.cpython-39.pyc
index e078244c..d10cbfb1 100644
Binary files a/models/official/modeling/optimization/__pycache__/lars_optimizer.cpython-39.pyc and b/models/official/modeling/optimization/__pycache__/lars_optimizer.cpython-39.pyc differ
diff --git a/models/official/modeling/optimization/__pycache__/legacy_adamw.cpython-39.pyc b/models/official/modeling/optimization/__pycache__/legacy_adamw.cpython-39.pyc
index f704eef7..1516b476 100644
Binary files a/models/official/modeling/optimization/__pycache__/legacy_adamw.cpython-39.pyc and b/models/official/modeling/optimization/__pycache__/legacy_adamw.cpython-39.pyc differ
diff --git a/models/official/modeling/optimization/__pycache__/lr_schedule.cpython-39.pyc b/models/official/modeling/optimization/__pycache__/lr_schedule.cpython-39.pyc
index b013e4b6..5db49f21 100644
Binary files a/models/official/modeling/optimization/__pycache__/lr_schedule.cpython-39.pyc and b/models/official/modeling/optimization/__pycache__/lr_schedule.cpython-39.pyc differ
diff --git a/models/official/modeling/optimization/__pycache__/optimizer_factory.cpython-39.pyc b/models/official/modeling/optimization/__pycache__/optimizer_factory.cpython-39.pyc
index 5e3ac41d..f0b70997 100644
Binary files a/models/official/modeling/optimization/__pycache__/optimizer_factory.cpython-39.pyc and b/models/official/modeling/optimization/__pycache__/optimizer_factory.cpython-39.pyc differ
diff --git a/models/official/modeling/optimization/__pycache__/slide_optimizer.cpython-39.pyc b/models/official/modeling/optimization/__pycache__/slide_optimizer.cpython-39.pyc
index 54b26a11..2836400f 100644
Binary files a/models/official/modeling/optimization/__pycache__/slide_optimizer.cpython-39.pyc and b/models/official/modeling/optimization/__pycache__/slide_optimizer.cpython-39.pyc differ
diff --git a/models/official/modeling/optimization/configs/__pycache__/__init__.cpython-38.pyc b/models/official/modeling/optimization/configs/__pycache__/__init__.cpython-38.pyc
new file mode 100644
index 00000000..9c445f68
Binary files /dev/null and b/models/official/modeling/optimization/configs/__pycache__/__init__.cpython-38.pyc differ
diff --git a/models/official/modeling/optimization/configs/__pycache__/__init__.cpython-39.pyc b/models/official/modeling/optimization/configs/__pycache__/__init__.cpython-39.pyc
index ec6f4a06..07548c56 100644
Binary files a/models/official/modeling/optimization/configs/__pycache__/__init__.cpython-39.pyc and b/models/official/modeling/optimization/configs/__pycache__/__init__.cpython-39.pyc differ
diff --git a/models/official/modeling/optimization/configs/__pycache__/learning_rate_config.cpython-38.pyc b/models/official/modeling/optimization/configs/__pycache__/learning_rate_config.cpython-38.pyc
new file mode 100644
index 00000000..d005c232
Binary files /dev/null and b/models/official/modeling/optimization/configs/__pycache__/learning_rate_config.cpython-38.pyc differ
diff --git a/models/official/modeling/optimization/configs/__pycache__/learning_rate_config.cpython-39.pyc b/models/official/modeling/optimization/configs/__pycache__/learning_rate_config.cpython-39.pyc
index f2a2d05c..81eea9ea 100644
Binary files a/models/official/modeling/optimization/configs/__pycache__/learning_rate_config.cpython-39.pyc and b/models/official/modeling/optimization/configs/__pycache__/learning_rate_config.cpython-39.pyc differ
diff --git a/models/official/modeling/optimization/configs/__pycache__/optimization_config.cpython-38.pyc b/models/official/modeling/optimization/configs/__pycache__/optimization_config.cpython-38.pyc
new file mode 100644
index 00000000..0301247e
Binary files /dev/null and b/models/official/modeling/optimization/configs/__pycache__/optimization_config.cpython-38.pyc differ
diff --git a/models/official/modeling/optimization/configs/__pycache__/optimization_config.cpython-39.pyc b/models/official/modeling/optimization/configs/__pycache__/optimization_config.cpython-39.pyc
index 0fbb937c..6ed37d40 100644
Binary files a/models/official/modeling/optimization/configs/__pycache__/optimization_config.cpython-39.pyc and b/models/official/modeling/optimization/configs/__pycache__/optimization_config.cpython-39.pyc differ
diff --git a/models/official/modeling/optimization/configs/__pycache__/optimizer_config.cpython-38.pyc b/models/official/modeling/optimization/configs/__pycache__/optimizer_config.cpython-38.pyc
new file mode 100644
index 00000000..1ef1d15b
Binary files /dev/null and b/models/official/modeling/optimization/configs/__pycache__/optimizer_config.cpython-38.pyc differ
diff --git a/models/official/modeling/optimization/configs/__pycache__/optimizer_config.cpython-39.pyc b/models/official/modeling/optimization/configs/__pycache__/optimizer_config.cpython-39.pyc
index af04e9ff..32568dee 100644
Binary files a/models/official/modeling/optimization/configs/__pycache__/optimizer_config.cpython-39.pyc and b/models/official/modeling/optimization/configs/__pycache__/optimizer_config.cpython-39.pyc differ
diff --git a/models/official/modeling/privacy/__pycache__/__init__.cpython-39.pyc b/models/official/modeling/privacy/__pycache__/__init__.cpython-39.pyc
index f88cc84b..9875d9c5 100644
Binary files a/models/official/modeling/privacy/__pycache__/__init__.cpython-39.pyc and b/models/official/modeling/privacy/__pycache__/__init__.cpython-39.pyc differ
diff --git a/models/official/modeling/privacy/__pycache__/configs.cpython-39.pyc b/models/official/modeling/privacy/__pycache__/configs.cpython-39.pyc
index 5ccbf940..e37fbcc9 100644
Binary files a/models/official/modeling/privacy/__pycache__/configs.cpython-39.pyc and b/models/official/modeling/privacy/__pycache__/configs.cpython-39.pyc differ
diff --git a/models/official/modeling/privacy/__pycache__/ops.cpython-39.pyc b/models/official/modeling/privacy/__pycache__/ops.cpython-39.pyc
index 982cb223..bb571fe4 100644
Binary files a/models/official/modeling/privacy/__pycache__/ops.cpython-39.pyc and b/models/official/modeling/privacy/__pycache__/ops.cpython-39.pyc differ
diff --git a/models/official/nlp/__pycache__/__init__.cpython-39.pyc b/models/official/nlp/__pycache__/__init__.cpython-39.pyc
index a8f61d00..2131f4ec 100644
Binary files a/models/official/nlp/__pycache__/__init__.cpython-39.pyc and b/models/official/nlp/__pycache__/__init__.cpython-39.pyc differ
diff --git a/models/official/nlp/modeling/__pycache__/__init__.cpython-39.pyc b/models/official/nlp/modeling/__pycache__/__init__.cpython-39.pyc
index edad3760..7f2f1583 100644
Binary files a/models/official/nlp/modeling/__pycache__/__init__.cpython-39.pyc and b/models/official/nlp/modeling/__pycache__/__init__.cpython-39.pyc differ
diff --git a/models/official/nlp/modeling/layers/__pycache__/__init__.cpython-39.pyc b/models/official/nlp/modeling/layers/__pycache__/__init__.cpython-39.pyc
index c442e6c7..b610bd92 100644
Binary files a/models/official/nlp/modeling/layers/__pycache__/__init__.cpython-39.pyc and b/models/official/nlp/modeling/layers/__pycache__/__init__.cpython-39.pyc differ
diff --git a/models/official/nlp/modeling/layers/__pycache__/attention.cpython-39.pyc b/models/official/nlp/modeling/layers/__pycache__/attention.cpython-39.pyc
index a27f18ac..ca299f63 100644
Binary files a/models/official/nlp/modeling/layers/__pycache__/attention.cpython-39.pyc and b/models/official/nlp/modeling/layers/__pycache__/attention.cpython-39.pyc differ
diff --git a/models/official/nlp/modeling/layers/__pycache__/bigbird_attention.cpython-39.pyc b/models/official/nlp/modeling/layers/__pycache__/bigbird_attention.cpython-39.pyc
index c63bb5da..2a149516 100644
Binary files a/models/official/nlp/modeling/layers/__pycache__/bigbird_attention.cpython-39.pyc and b/models/official/nlp/modeling/layers/__pycache__/bigbird_attention.cpython-39.pyc differ
diff --git a/models/official/nlp/modeling/layers/__pycache__/block_diag_feedforward.cpython-39.pyc b/models/official/nlp/modeling/layers/__pycache__/block_diag_feedforward.cpython-39.pyc
index ee41bc82..8bb39b1b 100644
Binary files a/models/official/nlp/modeling/layers/__pycache__/block_diag_feedforward.cpython-39.pyc and b/models/official/nlp/modeling/layers/__pycache__/block_diag_feedforward.cpython-39.pyc differ
diff --git a/models/official/nlp/modeling/layers/__pycache__/cls_head.cpython-39.pyc b/models/official/nlp/modeling/layers/__pycache__/cls_head.cpython-39.pyc
index b612caa3..ca6aea18 100644
Binary files a/models/official/nlp/modeling/layers/__pycache__/cls_head.cpython-39.pyc and b/models/official/nlp/modeling/layers/__pycache__/cls_head.cpython-39.pyc differ
diff --git a/models/official/nlp/modeling/layers/__pycache__/factorized_embedding.cpython-39.pyc b/models/official/nlp/modeling/layers/__pycache__/factorized_embedding.cpython-39.pyc
index 1892c3db..91aa6193 100644
Binary files a/models/official/nlp/modeling/layers/__pycache__/factorized_embedding.cpython-39.pyc and b/models/official/nlp/modeling/layers/__pycache__/factorized_embedding.cpython-39.pyc differ
diff --git a/models/official/nlp/modeling/layers/__pycache__/gated_feedforward.cpython-39.pyc b/models/official/nlp/modeling/layers/__pycache__/gated_feedforward.cpython-39.pyc
index 41e9d840..2f8fd696 100644
Binary files a/models/official/nlp/modeling/layers/__pycache__/gated_feedforward.cpython-39.pyc and b/models/official/nlp/modeling/layers/__pycache__/gated_feedforward.cpython-39.pyc differ
diff --git a/models/official/nlp/modeling/layers/__pycache__/gaussian_process.cpython-39.pyc b/models/official/nlp/modeling/layers/__pycache__/gaussian_process.cpython-39.pyc
index 77d8d945..24d50ffe 100644
Binary files a/models/official/nlp/modeling/layers/__pycache__/gaussian_process.cpython-39.pyc and b/models/official/nlp/modeling/layers/__pycache__/gaussian_process.cpython-39.pyc differ
diff --git a/models/official/nlp/modeling/layers/__pycache__/kernel_attention.cpython-39.pyc b/models/official/nlp/modeling/layers/__pycache__/kernel_attention.cpython-39.pyc
index 10458f94..e12ed0e0 100644
Binary files a/models/official/nlp/modeling/layers/__pycache__/kernel_attention.cpython-39.pyc and b/models/official/nlp/modeling/layers/__pycache__/kernel_attention.cpython-39.pyc differ
diff --git a/models/official/nlp/modeling/layers/__pycache__/masked_lm.cpython-39.pyc b/models/official/nlp/modeling/layers/__pycache__/masked_lm.cpython-39.pyc
index c8949aca..b6299296 100644
Binary files a/models/official/nlp/modeling/layers/__pycache__/masked_lm.cpython-39.pyc and b/models/official/nlp/modeling/layers/__pycache__/masked_lm.cpython-39.pyc differ
diff --git a/models/official/nlp/modeling/layers/__pycache__/masked_softmax.cpython-39.pyc b/models/official/nlp/modeling/layers/__pycache__/masked_softmax.cpython-39.pyc
index 8ccff30b..abe7a55b 100644
Binary files a/models/official/nlp/modeling/layers/__pycache__/masked_softmax.cpython-39.pyc and b/models/official/nlp/modeling/layers/__pycache__/masked_softmax.cpython-39.pyc differ
diff --git a/models/official/nlp/modeling/layers/__pycache__/mat_mul_with_margin.cpython-39.pyc b/models/official/nlp/modeling/layers/__pycache__/mat_mul_with_margin.cpython-39.pyc
index d089e23e..f726d4c0 100644
Binary files a/models/official/nlp/modeling/layers/__pycache__/mat_mul_with_margin.cpython-39.pyc and b/models/official/nlp/modeling/layers/__pycache__/mat_mul_with_margin.cpython-39.pyc differ
diff --git a/models/official/nlp/modeling/layers/__pycache__/mixing.cpython-39.pyc b/models/official/nlp/modeling/layers/__pycache__/mixing.cpython-39.pyc
index 8d750e95..a31773cf 100644
Binary files a/models/official/nlp/modeling/layers/__pycache__/mixing.cpython-39.pyc and b/models/official/nlp/modeling/layers/__pycache__/mixing.cpython-39.pyc differ
diff --git a/models/official/nlp/modeling/layers/__pycache__/mobile_bert_layers.cpython-39.pyc b/models/official/nlp/modeling/layers/__pycache__/mobile_bert_layers.cpython-39.pyc
index efa604df..36b480b6 100644
Binary files a/models/official/nlp/modeling/layers/__pycache__/mobile_bert_layers.cpython-39.pyc and b/models/official/nlp/modeling/layers/__pycache__/mobile_bert_layers.cpython-39.pyc differ
diff --git a/models/official/nlp/modeling/layers/__pycache__/moe.cpython-39.pyc b/models/official/nlp/modeling/layers/__pycache__/moe.cpython-39.pyc
index 73631882..96a68c43 100644
Binary files a/models/official/nlp/modeling/layers/__pycache__/moe.cpython-39.pyc and b/models/official/nlp/modeling/layers/__pycache__/moe.cpython-39.pyc differ
diff --git a/models/official/nlp/modeling/layers/__pycache__/multi_channel_attention.cpython-39.pyc b/models/official/nlp/modeling/layers/__pycache__/multi_channel_attention.cpython-39.pyc
index f24b7267..7d9d6263 100644
Binary files a/models/official/nlp/modeling/layers/__pycache__/multi_channel_attention.cpython-39.pyc and b/models/official/nlp/modeling/layers/__pycache__/multi_channel_attention.cpython-39.pyc differ
diff --git a/models/official/nlp/modeling/layers/__pycache__/on_device_embedding.cpython-39.pyc b/models/official/nlp/modeling/layers/__pycache__/on_device_embedding.cpython-39.pyc
index 694d9a50..50bd9797 100644
Binary files a/models/official/nlp/modeling/layers/__pycache__/on_device_embedding.cpython-39.pyc and b/models/official/nlp/modeling/layers/__pycache__/on_device_embedding.cpython-39.pyc differ
diff --git a/models/official/nlp/modeling/layers/__pycache__/pack_optimization.cpython-39.pyc b/models/official/nlp/modeling/layers/__pycache__/pack_optimization.cpython-39.pyc
index 1bfbb1de..d94a6c2a 100644
Binary files a/models/official/nlp/modeling/layers/__pycache__/pack_optimization.cpython-39.pyc and b/models/official/nlp/modeling/layers/__pycache__/pack_optimization.cpython-39.pyc differ
diff --git a/models/official/nlp/modeling/layers/__pycache__/per_dim_scale_attention.cpython-39.pyc b/models/official/nlp/modeling/layers/__pycache__/per_dim_scale_attention.cpython-39.pyc
index d009cd48..723903a9 100644
Binary files a/models/official/nlp/modeling/layers/__pycache__/per_dim_scale_attention.cpython-39.pyc and b/models/official/nlp/modeling/layers/__pycache__/per_dim_scale_attention.cpython-39.pyc differ
diff --git a/models/official/nlp/modeling/layers/__pycache__/position_embedding.cpython-39.pyc b/models/official/nlp/modeling/layers/__pycache__/position_embedding.cpython-39.pyc
index 4f05967e..12cf9cf1 100644
Binary files a/models/official/nlp/modeling/layers/__pycache__/position_embedding.cpython-39.pyc and b/models/official/nlp/modeling/layers/__pycache__/position_embedding.cpython-39.pyc differ
diff --git a/models/official/nlp/modeling/layers/__pycache__/relative_attention.cpython-39.pyc b/models/official/nlp/modeling/layers/__pycache__/relative_attention.cpython-39.pyc
index 90b1e8f1..a4e760aa 100644
Binary files a/models/official/nlp/modeling/layers/__pycache__/relative_attention.cpython-39.pyc and b/models/official/nlp/modeling/layers/__pycache__/relative_attention.cpython-39.pyc differ
diff --git a/models/official/nlp/modeling/layers/__pycache__/reuse_attention.cpython-39.pyc b/models/official/nlp/modeling/layers/__pycache__/reuse_attention.cpython-39.pyc
index 85629c4a..5a53a6d6 100644
Binary files a/models/official/nlp/modeling/layers/__pycache__/reuse_attention.cpython-39.pyc and b/models/official/nlp/modeling/layers/__pycache__/reuse_attention.cpython-39.pyc differ
diff --git a/models/official/nlp/modeling/layers/__pycache__/reuse_transformer.cpython-39.pyc b/models/official/nlp/modeling/layers/__pycache__/reuse_transformer.cpython-39.pyc
index 5a99c811..f4460b05 100644
Binary files a/models/official/nlp/modeling/layers/__pycache__/reuse_transformer.cpython-39.pyc and b/models/official/nlp/modeling/layers/__pycache__/reuse_transformer.cpython-39.pyc differ
diff --git a/models/official/nlp/modeling/layers/__pycache__/rezero_transformer.cpython-39.pyc b/models/official/nlp/modeling/layers/__pycache__/rezero_transformer.cpython-39.pyc
index 3d01c62a..67c11f86 100644
Binary files a/models/official/nlp/modeling/layers/__pycache__/rezero_transformer.cpython-39.pyc and b/models/official/nlp/modeling/layers/__pycache__/rezero_transformer.cpython-39.pyc differ
diff --git a/models/official/nlp/modeling/layers/__pycache__/routing.cpython-39.pyc b/models/official/nlp/modeling/layers/__pycache__/routing.cpython-39.pyc
index 510e0320..0120a97a 100644
Binary files a/models/official/nlp/modeling/layers/__pycache__/routing.cpython-39.pyc and b/models/official/nlp/modeling/layers/__pycache__/routing.cpython-39.pyc differ
diff --git a/models/official/nlp/modeling/layers/__pycache__/self_attention_mask.cpython-39.pyc b/models/official/nlp/modeling/layers/__pycache__/self_attention_mask.cpython-39.pyc
index d7977b51..a0e7ceca 100644
Binary files a/models/official/nlp/modeling/layers/__pycache__/self_attention_mask.cpython-39.pyc and b/models/official/nlp/modeling/layers/__pycache__/self_attention_mask.cpython-39.pyc differ
diff --git a/models/official/nlp/modeling/layers/__pycache__/spectral_normalization.cpython-39.pyc b/models/official/nlp/modeling/layers/__pycache__/spectral_normalization.cpython-39.pyc
index 06776b91..ca2c619e 100644
Binary files a/models/official/nlp/modeling/layers/__pycache__/spectral_normalization.cpython-39.pyc and b/models/official/nlp/modeling/layers/__pycache__/spectral_normalization.cpython-39.pyc differ
diff --git a/models/official/nlp/modeling/layers/__pycache__/talking_heads_attention.cpython-39.pyc b/models/official/nlp/modeling/layers/__pycache__/talking_heads_attention.cpython-39.pyc
index 46dfe514..395eb9ef 100644
Binary files a/models/official/nlp/modeling/layers/__pycache__/talking_heads_attention.cpython-39.pyc and b/models/official/nlp/modeling/layers/__pycache__/talking_heads_attention.cpython-39.pyc differ
diff --git a/models/official/nlp/modeling/layers/__pycache__/text_layers.cpython-39.pyc b/models/official/nlp/modeling/layers/__pycache__/text_layers.cpython-39.pyc
index 6a028690..d59d6275 100644
Binary files a/models/official/nlp/modeling/layers/__pycache__/text_layers.cpython-39.pyc and b/models/official/nlp/modeling/layers/__pycache__/text_layers.cpython-39.pyc differ
diff --git a/models/official/nlp/modeling/layers/__pycache__/tn_expand_condense.cpython-39.pyc b/models/official/nlp/modeling/layers/__pycache__/tn_expand_condense.cpython-39.pyc
index 6d449224..f9d35bff 100644
Binary files a/models/official/nlp/modeling/layers/__pycache__/tn_expand_condense.cpython-39.pyc and b/models/official/nlp/modeling/layers/__pycache__/tn_expand_condense.cpython-39.pyc differ
diff --git a/models/official/nlp/modeling/layers/__pycache__/tn_transformer_expand_condense.cpython-39.pyc b/models/official/nlp/modeling/layers/__pycache__/tn_transformer_expand_condense.cpython-39.pyc
index 319152d0..b9e305ca 100644
Binary files a/models/official/nlp/modeling/layers/__pycache__/tn_transformer_expand_condense.cpython-39.pyc and b/models/official/nlp/modeling/layers/__pycache__/tn_transformer_expand_condense.cpython-39.pyc differ
diff --git a/models/official/nlp/modeling/layers/__pycache__/transformer.cpython-39.pyc b/models/official/nlp/modeling/layers/__pycache__/transformer.cpython-39.pyc
index 4afc2e33..17ed283a 100644
Binary files a/models/official/nlp/modeling/layers/__pycache__/transformer.cpython-39.pyc and b/models/official/nlp/modeling/layers/__pycache__/transformer.cpython-39.pyc differ
diff --git a/models/official/nlp/modeling/layers/__pycache__/transformer_encoder_block.cpython-39.pyc b/models/official/nlp/modeling/layers/__pycache__/transformer_encoder_block.cpython-39.pyc
index 970872a4..1ab39f3d 100644
Binary files a/models/official/nlp/modeling/layers/__pycache__/transformer_encoder_block.cpython-39.pyc and b/models/official/nlp/modeling/layers/__pycache__/transformer_encoder_block.cpython-39.pyc differ
diff --git a/models/official/nlp/modeling/layers/__pycache__/transformer_scaffold.cpython-39.pyc b/models/official/nlp/modeling/layers/__pycache__/transformer_scaffold.cpython-39.pyc
index 71cdb494..e10c3986 100644
Binary files a/models/official/nlp/modeling/layers/__pycache__/transformer_scaffold.cpython-39.pyc and b/models/official/nlp/modeling/layers/__pycache__/transformer_scaffold.cpython-39.pyc differ
diff --git a/models/official/nlp/modeling/layers/__pycache__/transformer_xl.cpython-39.pyc b/models/official/nlp/modeling/layers/__pycache__/transformer_xl.cpython-39.pyc
index b532a170..4b7bcd94 100644
Binary files a/models/official/nlp/modeling/layers/__pycache__/transformer_xl.cpython-39.pyc and b/models/official/nlp/modeling/layers/__pycache__/transformer_xl.cpython-39.pyc differ
diff --git a/models/official/nlp/modeling/layers/__pycache__/util.cpython-39.pyc b/models/official/nlp/modeling/layers/__pycache__/util.cpython-39.pyc
index ccd2db52..1035b511 100644
Binary files a/models/official/nlp/modeling/layers/__pycache__/util.cpython-39.pyc and b/models/official/nlp/modeling/layers/__pycache__/util.cpython-39.pyc differ
diff --git a/models/official/nlp/modeling/losses/__pycache__/__init__.cpython-39.pyc b/models/official/nlp/modeling/losses/__pycache__/__init__.cpython-39.pyc
index e9798617..208691ec 100644
Binary files a/models/official/nlp/modeling/losses/__pycache__/__init__.cpython-39.pyc and b/models/official/nlp/modeling/losses/__pycache__/__init__.cpython-39.pyc differ
diff --git a/models/official/nlp/modeling/losses/__pycache__/weighted_sparse_categorical_crossentropy.cpython-39.pyc b/models/official/nlp/modeling/losses/__pycache__/weighted_sparse_categorical_crossentropy.cpython-39.pyc
index b8518609..c33404a7 100644
Binary files a/models/official/nlp/modeling/losses/__pycache__/weighted_sparse_categorical_crossentropy.cpython-39.pyc and b/models/official/nlp/modeling/losses/__pycache__/weighted_sparse_categorical_crossentropy.cpython-39.pyc differ
diff --git a/models/official/nlp/modeling/models/__pycache__/__init__.cpython-39.pyc b/models/official/nlp/modeling/models/__pycache__/__init__.cpython-39.pyc
index 4e8860ce..00e64f2c 100644
Binary files a/models/official/nlp/modeling/models/__pycache__/__init__.cpython-39.pyc and b/models/official/nlp/modeling/models/__pycache__/__init__.cpython-39.pyc differ
diff --git a/models/official/nlp/modeling/models/__pycache__/bert_classifier.cpython-39.pyc b/models/official/nlp/modeling/models/__pycache__/bert_classifier.cpython-39.pyc
index 9ebec868..ef63a127 100644
Binary files a/models/official/nlp/modeling/models/__pycache__/bert_classifier.cpython-39.pyc and b/models/official/nlp/modeling/models/__pycache__/bert_classifier.cpython-39.pyc differ
diff --git a/models/official/nlp/modeling/models/__pycache__/bert_pretrainer.cpython-39.pyc b/models/official/nlp/modeling/models/__pycache__/bert_pretrainer.cpython-39.pyc
index 5dc46f2d..755d15d7 100644
Binary files a/models/official/nlp/modeling/models/__pycache__/bert_pretrainer.cpython-39.pyc and b/models/official/nlp/modeling/models/__pycache__/bert_pretrainer.cpython-39.pyc differ
diff --git a/models/official/nlp/modeling/models/__pycache__/bert_span_labeler.cpython-39.pyc b/models/official/nlp/modeling/models/__pycache__/bert_span_labeler.cpython-39.pyc
index 88a525aa..eb2a2499 100644
Binary files a/models/official/nlp/modeling/models/__pycache__/bert_span_labeler.cpython-39.pyc and b/models/official/nlp/modeling/models/__pycache__/bert_span_labeler.cpython-39.pyc differ
diff --git a/models/official/nlp/modeling/models/__pycache__/bert_token_classifier.cpython-39.pyc b/models/official/nlp/modeling/models/__pycache__/bert_token_classifier.cpython-39.pyc
index 0bef8f84..e62c16e8 100644
Binary files a/models/official/nlp/modeling/models/__pycache__/bert_token_classifier.cpython-39.pyc and b/models/official/nlp/modeling/models/__pycache__/bert_token_classifier.cpython-39.pyc differ
diff --git a/models/official/nlp/modeling/models/__pycache__/dual_encoder.cpython-39.pyc b/models/official/nlp/modeling/models/__pycache__/dual_encoder.cpython-39.pyc
index 69945deb..a2e69a7a 100644
Binary files a/models/official/nlp/modeling/models/__pycache__/dual_encoder.cpython-39.pyc and b/models/official/nlp/modeling/models/__pycache__/dual_encoder.cpython-39.pyc differ
diff --git a/models/official/nlp/modeling/models/__pycache__/electra_pretrainer.cpython-39.pyc b/models/official/nlp/modeling/models/__pycache__/electra_pretrainer.cpython-39.pyc
index 4ec9ba74..41e27fa3 100644
Binary files a/models/official/nlp/modeling/models/__pycache__/electra_pretrainer.cpython-39.pyc and b/models/official/nlp/modeling/models/__pycache__/electra_pretrainer.cpython-39.pyc differ
diff --git a/models/official/nlp/modeling/models/__pycache__/seq2seq_transformer.cpython-39.pyc b/models/official/nlp/modeling/models/__pycache__/seq2seq_transformer.cpython-39.pyc
index 0ddc6f03..baf396e3 100644
Binary files a/models/official/nlp/modeling/models/__pycache__/seq2seq_transformer.cpython-39.pyc and b/models/official/nlp/modeling/models/__pycache__/seq2seq_transformer.cpython-39.pyc differ
diff --git a/models/official/nlp/modeling/models/__pycache__/t5.cpython-39.pyc b/models/official/nlp/modeling/models/__pycache__/t5.cpython-39.pyc
index 562c963a..602fd298 100644
Binary files a/models/official/nlp/modeling/models/__pycache__/t5.cpython-39.pyc and b/models/official/nlp/modeling/models/__pycache__/t5.cpython-39.pyc differ
diff --git a/models/official/nlp/modeling/models/__pycache__/xlnet.cpython-39.pyc b/models/official/nlp/modeling/models/__pycache__/xlnet.cpython-39.pyc
index c30822a4..d748b665 100644
Binary files a/models/official/nlp/modeling/models/__pycache__/xlnet.cpython-39.pyc and b/models/official/nlp/modeling/models/__pycache__/xlnet.cpython-39.pyc differ
diff --git a/models/official/nlp/modeling/networks/__pycache__/__init__.cpython-39.pyc b/models/official/nlp/modeling/networks/__pycache__/__init__.cpython-39.pyc
index 6f7da03d..139ae631 100644
Binary files a/models/official/nlp/modeling/networks/__pycache__/__init__.cpython-39.pyc and b/models/official/nlp/modeling/networks/__pycache__/__init__.cpython-39.pyc differ
diff --git a/models/official/nlp/modeling/networks/__pycache__/albert_encoder.cpython-39.pyc b/models/official/nlp/modeling/networks/__pycache__/albert_encoder.cpython-39.pyc
index a668f8f6..ba070ac6 100644
Binary files a/models/official/nlp/modeling/networks/__pycache__/albert_encoder.cpython-39.pyc and b/models/official/nlp/modeling/networks/__pycache__/albert_encoder.cpython-39.pyc differ
diff --git a/models/official/nlp/modeling/networks/__pycache__/bert_encoder.cpython-39.pyc b/models/official/nlp/modeling/networks/__pycache__/bert_encoder.cpython-39.pyc
index dd51c8f6..828c0959 100644
Binary files a/models/official/nlp/modeling/networks/__pycache__/bert_encoder.cpython-39.pyc and b/models/official/nlp/modeling/networks/__pycache__/bert_encoder.cpython-39.pyc differ
diff --git a/models/official/nlp/modeling/networks/__pycache__/classification.cpython-39.pyc b/models/official/nlp/modeling/networks/__pycache__/classification.cpython-39.pyc
index 3b844db5..f683749e 100644
Binary files a/models/official/nlp/modeling/networks/__pycache__/classification.cpython-39.pyc and b/models/official/nlp/modeling/networks/__pycache__/classification.cpython-39.pyc differ
diff --git a/models/official/nlp/modeling/networks/__pycache__/encoder_scaffold.cpython-39.pyc b/models/official/nlp/modeling/networks/__pycache__/encoder_scaffold.cpython-39.pyc
index 66a6db63..74ea7bf1 100644
Binary files a/models/official/nlp/modeling/networks/__pycache__/encoder_scaffold.cpython-39.pyc and b/models/official/nlp/modeling/networks/__pycache__/encoder_scaffold.cpython-39.pyc differ
diff --git a/models/official/nlp/modeling/networks/__pycache__/fnet.cpython-39.pyc b/models/official/nlp/modeling/networks/__pycache__/fnet.cpython-39.pyc
index 532f2b85..10c8508f 100644
Binary files a/models/official/nlp/modeling/networks/__pycache__/fnet.cpython-39.pyc and b/models/official/nlp/modeling/networks/__pycache__/fnet.cpython-39.pyc differ
diff --git a/models/official/nlp/modeling/networks/__pycache__/funnel_transformer.cpython-39.pyc b/models/official/nlp/modeling/networks/__pycache__/funnel_transformer.cpython-39.pyc
index d2e8c2b4..6d0f0d02 100644
Binary files a/models/official/nlp/modeling/networks/__pycache__/funnel_transformer.cpython-39.pyc and b/models/official/nlp/modeling/networks/__pycache__/funnel_transformer.cpython-39.pyc differ
diff --git a/models/official/nlp/modeling/networks/__pycache__/mobile_bert_encoder.cpython-39.pyc b/models/official/nlp/modeling/networks/__pycache__/mobile_bert_encoder.cpython-39.pyc
index bc0974be..c3fee802 100644
Binary files a/models/official/nlp/modeling/networks/__pycache__/mobile_bert_encoder.cpython-39.pyc and b/models/official/nlp/modeling/networks/__pycache__/mobile_bert_encoder.cpython-39.pyc differ
diff --git a/models/official/nlp/modeling/networks/__pycache__/packed_sequence_embedding.cpython-39.pyc b/models/official/nlp/modeling/networks/__pycache__/packed_sequence_embedding.cpython-39.pyc
index 434d9b86..b44697da 100644
Binary files a/models/official/nlp/modeling/networks/__pycache__/packed_sequence_embedding.cpython-39.pyc and b/models/official/nlp/modeling/networks/__pycache__/packed_sequence_embedding.cpython-39.pyc differ
diff --git a/models/official/nlp/modeling/networks/__pycache__/span_labeling.cpython-39.pyc b/models/official/nlp/modeling/networks/__pycache__/span_labeling.cpython-39.pyc
index 0a07461e..ff2d9e62 100644
Binary files a/models/official/nlp/modeling/networks/__pycache__/span_labeling.cpython-39.pyc and b/models/official/nlp/modeling/networks/__pycache__/span_labeling.cpython-39.pyc differ
diff --git a/models/official/nlp/modeling/networks/__pycache__/sparse_mixer.cpython-39.pyc b/models/official/nlp/modeling/networks/__pycache__/sparse_mixer.cpython-39.pyc
index 73984f98..0ae32a30 100644
Binary files a/models/official/nlp/modeling/networks/__pycache__/sparse_mixer.cpython-39.pyc and b/models/official/nlp/modeling/networks/__pycache__/sparse_mixer.cpython-39.pyc differ
diff --git a/models/official/nlp/modeling/networks/__pycache__/xlnet_base.cpython-39.pyc b/models/official/nlp/modeling/networks/__pycache__/xlnet_base.cpython-39.pyc
index d827627b..203dfdf1 100644
Binary files a/models/official/nlp/modeling/networks/__pycache__/xlnet_base.cpython-39.pyc and b/models/official/nlp/modeling/networks/__pycache__/xlnet_base.cpython-39.pyc differ
diff --git a/models/official/nlp/modeling/ops/__pycache__/__init__.cpython-39.pyc b/models/official/nlp/modeling/ops/__pycache__/__init__.cpython-39.pyc
index 2f94e62f..cf75ae1b 100644
Binary files a/models/official/nlp/modeling/ops/__pycache__/__init__.cpython-39.pyc and b/models/official/nlp/modeling/ops/__pycache__/__init__.cpython-39.pyc differ
diff --git a/models/official/nlp/modeling/ops/__pycache__/beam_search.cpython-39.pyc b/models/official/nlp/modeling/ops/__pycache__/beam_search.cpython-39.pyc
index 42f40d4c..1813314c 100644
Binary files a/models/official/nlp/modeling/ops/__pycache__/beam_search.cpython-39.pyc and b/models/official/nlp/modeling/ops/__pycache__/beam_search.cpython-39.pyc differ
diff --git a/models/official/nlp/modeling/ops/__pycache__/decoding_module.cpython-39.pyc b/models/official/nlp/modeling/ops/__pycache__/decoding_module.cpython-39.pyc
index da14a917..2e660854 100644
Binary files a/models/official/nlp/modeling/ops/__pycache__/decoding_module.cpython-39.pyc and b/models/official/nlp/modeling/ops/__pycache__/decoding_module.cpython-39.pyc differ
diff --git a/models/official/nlp/modeling/ops/__pycache__/sampling_module.cpython-39.pyc b/models/official/nlp/modeling/ops/__pycache__/sampling_module.cpython-39.pyc
index 49a29a29..a8da6b5a 100644
Binary files a/models/official/nlp/modeling/ops/__pycache__/sampling_module.cpython-39.pyc and b/models/official/nlp/modeling/ops/__pycache__/sampling_module.cpython-39.pyc differ
diff --git a/models/official/nlp/modeling/ops/__pycache__/segment_extractor.cpython-39.pyc b/models/official/nlp/modeling/ops/__pycache__/segment_extractor.cpython-39.pyc
index 98013f8f..68891285 100644
Binary files a/models/official/nlp/modeling/ops/__pycache__/segment_extractor.cpython-39.pyc and b/models/official/nlp/modeling/ops/__pycache__/segment_extractor.cpython-39.pyc differ
diff --git a/models/official/projects/__pycache__/__init__.cpython-38.pyc b/models/official/projects/__pycache__/__init__.cpython-38.pyc
new file mode 100644
index 00000000..49aea1f5
Binary files /dev/null and b/models/official/projects/__pycache__/__init__.cpython-38.pyc differ
diff --git a/models/official/projects/__pycache__/__init__.cpython-39.pyc b/models/official/projects/__pycache__/__init__.cpython-39.pyc
index d869b0b9..b5ef6791 100644
Binary files a/models/official/projects/__pycache__/__init__.cpython-39.pyc and b/models/official/projects/__pycache__/__init__.cpython-39.pyc differ
diff --git a/models/official/projects/detr/configs/__pycache__/detr.cpython-39.pyc b/models/official/projects/detr/configs/__pycache__/detr.cpython-39.pyc
new file mode 100644
index 00000000..a17d47f9
Binary files /dev/null and b/models/official/projects/detr/configs/__pycache__/detr.cpython-39.pyc differ
diff --git a/models/official/projects/detr/dataloaders/__pycache__/detr_input.cpython-39.pyc b/models/official/projects/detr/dataloaders/__pycache__/detr_input.cpython-39.pyc
new file mode 100644
index 00000000..7406f11d
Binary files /dev/null and b/models/official/projects/detr/dataloaders/__pycache__/detr_input.cpython-39.pyc differ
diff --git a/models/official/projects/detr/dataloaders/coco.py b/models/official/projects/detr/dataloaders/coco.py
index cf0835b8..54cf4bb8 100644
--- a/models/official/projects/detr/dataloaders/coco.py
+++ b/models/official/projects/detr/dataloaders/coco.py
@@ -116,7 +116,7 @@ def preprocess(self, inputs):
                 classes, self._params.max_num_boxes),
         'boxes':
             preprocess_ops.clip_or_pad_to_fixed_size(
-                boxes, self._params.max_num_boxes)
+                boxes, self._params.max_num_boxes) # [4, 100]
     }
     if not self._params.is_training:
       labels.update({
diff --git a/models/official/projects/detr/experiments/detr_r50_300epochs.sh b/models/official/projects/detr/experiments/detr_r50_300epochs.sh
index 162f9743..8fb91a63 100644
--- a/models/official/projects/detr/experiments/detr_r50_300epochs.sh
+++ b/models/official/projects/detr/experiments/detr_r50_300epochs.sh
@@ -2,5 +2,6 @@
 python3 official/projects/detr/train.py \
   --experiment=detr_coco \
   --mode=train_and_eval \
-  --model_dir=/tmp/logging_dir/ \
+  --model_dir="./" \
   --params_override=task.init_checkpoint='gs://tf_model_garden/vision/resnet50_imagenet/ckpt-62400',trainer.train_steps=554400,trainer.optimizer_config.learning_rate.stepwise.boundaries="[369600]"
+  
diff --git a/models/official/projects/detr/modeling/__pycache__/detr.cpython-39.pyc b/models/official/projects/detr/modeling/__pycache__/detr.cpython-39.pyc
index 47452307..e33801d6 100644
Binary files a/models/official/projects/detr/modeling/__pycache__/detr.cpython-39.pyc and b/models/official/projects/detr/modeling/__pycache__/detr.cpython-39.pyc differ
diff --git a/models/official/projects/detr/modeling/__pycache__/transformer.cpython-39.pyc b/models/official/projects/detr/modeling/__pycache__/transformer.cpython-39.pyc
index e01185b5..6863e750 100644
Binary files a/models/official/projects/detr/modeling/__pycache__/transformer.cpython-39.pyc and b/models/official/projects/detr/modeling/__pycache__/transformer.cpython-39.pyc differ
diff --git a/models/official/projects/detr/ops/__pycache__/matchers.cpython-39.pyc b/models/official/projects/detr/ops/__pycache__/matchers.cpython-39.pyc
new file mode 100644
index 00000000..1dab4e3c
Binary files /dev/null and b/models/official/projects/detr/ops/__pycache__/matchers.cpython-39.pyc differ
diff --git a/models/official/projects/detr/ops/matchers.py b/models/official/projects/detr/ops/matchers.py
index fcda929f..e27b7115 100644
--- a/models/official/projects/detr/ops/matchers.py
+++ b/models/official/projects/detr/ops/matchers.py
@@ -27,9 +27,9 @@
 Based on the original implementation by Jiquan Ngiam <jngiam@google.com>.
 """
 import tensorflow as tf
+# from official.modeling import tf_utils
 from official.modeling import tf_utils
 
-
 def _prepare(weights):
   """Prepare the cost matrix.
 
@@ -73,6 +73,7 @@ def _greedy_assignment(adj_matrix):
     Each row and column can have at most one true element. Some of the rows
     and columns may not be matched.
   """
+  
   _, num_elems, _ = tf_utils.get_shape_list(adj_matrix, expected_rank=3)
   adj_matrix = tf.transpose(adj_matrix, [1, 0, 2])
 
diff --git a/models/official/projects/detr/ops/matchers_test.py b/models/official/projects/detr/ops/matchers_test.py
index 09b12e1f..87e27430 100644
--- a/models/official/projects/detr/ops/matchers_test.py
+++ b/models/official/projects/detr/ops/matchers_test.py
@@ -18,9 +18,8 @@
 from scipy import optimize
 import tensorflow as tf
 
-from official.projects.detr.ops import matchers
-
-
+# from official.projects.detr.ops import matchers
+import matchers
 class MatchersOpsTest(tf.test.TestCase):
 
   def testLinearSumAssignment(self):
diff --git a/models/official/projects/detr/tasks/__pycache__/detection.cpython-39.pyc b/models/official/projects/detr/tasks/__pycache__/detection.cpython-39.pyc
new file mode 100644
index 00000000..b4aac805
Binary files /dev/null and b/models/official/projects/detr/tasks/__pycache__/detection.cpython-39.pyc differ
diff --git a/models/official/projects/detr/tasks/detection.py b/models/official/projects/detr/tasks/detection.py
index 55806d0c..b247df0a 100644
--- a/models/official/projects/detr/tasks/detection.py
+++ b/models/official/projects/detr/tasks/detection.py
@@ -152,7 +152,7 @@ def _compute_cost(self, cls_outputs, box_outputs, cls_targets, box_targets):
         tf.cast(tf.not_equal(cls_targets, 0), dtype=total_cost.dtype), axis=1)
     total_cost = (1 - valid) * max_cost + valid * total_cost
 
-    # Set inf of nan to large constant
+    # Set inf or nan to large constant
     total_cost = tf.where(
         tf.logical_or(tf.math.is_nan(total_cost), tf.math.is_inf(total_cost)),
         max_cost * tf.ones_like(total_cost, dtype=total_cost.dtype),
@@ -176,7 +176,7 @@ def build_losses(self, outputs, labels, aux_losses=None):
     target_index = tf.math.argmax(indices, axis=1)
     cls_assigned = tf.gather(cls_outputs, target_index, batch_dims=1, axis=1)
     box_assigned = tf.gather(box_outputs, target_index, batch_dims=1, axis=1)
-
+    
     background = tf.equal(cls_targets, 0)
     num_boxes = tf.reduce_sum(
         tf.cast(tf.logical_not(background), tf.float32), axis=-1)
diff --git a/models/official/projects/maskformer/.gitignore b/models/official/projects/maskformer/.gitignore
new file mode 100644
index 00000000..d40353df
--- /dev/null
+++ b/models/official/projects/maskformer/.gitignore
@@ -0,0 +1,4 @@
+.gitignore
+myreadme.md
+ckpts/
+.npy
\ No newline at end of file
diff --git a/models/official/projects/maskformer/.ipynb_checkpoints/testing-checkpoint.ipynb b/models/official/projects/maskformer/.ipynb_checkpoints/testing-checkpoint.ipynb
new file mode 100644
index 00000000..7fec5150
--- /dev/null
+++ b/models/official/projects/maskformer/.ipynb_checkpoints/testing-checkpoint.ipynb
@@ -0,0 +1,6 @@
+{
+ "cells": [],
+ "metadata": {},
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/models/official/projects/maskformer/__pycache__/__init__.cpython-38.pyc b/models/official/projects/maskformer/__pycache__/__init__.cpython-38.pyc
new file mode 100644
index 00000000..55d3bd5d
Binary files /dev/null and b/models/official/projects/maskformer/__pycache__/__init__.cpython-38.pyc differ
diff --git a/models/official/projects/maskformer/__pycache__/__init__.cpython-39.pyc b/models/official/projects/maskformer/__pycache__/__init__.cpython-39.pyc
index 00b49200..cbfe52ca 100644
Binary files a/models/official/projects/maskformer/__pycache__/__init__.cpython-39.pyc and b/models/official/projects/maskformer/__pycache__/__init__.cpython-39.pyc differ
diff --git a/models/official/projects/maskformer/__pycache__/optimization.cpython-39.pyc b/models/official/projects/maskformer/__pycache__/optimization.cpython-39.pyc
new file mode 100644
index 00000000..99998d36
Binary files /dev/null and b/models/official/projects/maskformer/__pycache__/optimization.cpython-39.pyc differ
diff --git a/models/official/projects/maskformer/configs/.ipynb_checkpoints/maskformer-checkpoint.py b/models/official/projects/maskformer/configs/.ipynb_checkpoints/maskformer-checkpoint.py
new file mode 100644
index 00000000..8338d6c8
--- /dev/null
+++ b/models/official/projects/maskformer/configs/.ipynb_checkpoints/maskformer-checkpoint.py
@@ -0,0 +1,212 @@
+# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""MaskFormer configurations."""
+
+import dataclasses
+import os
+from typing import List, Optional, Union
+
+from official.core import config_definitions as cfg
+from official.core import exp_factory
+from official.modeling import hyperparams
+from official.vision.configs import backbones
+from official.vision.configs import common
+from official.projects.maskformer import optimization
+
+
+@dataclasses.dataclass
+class Parser(hyperparams.Config):
+    """Config definitions for parser"""
+    output_size: List[int] = None
+    min_scale: float = 0.3
+    aspect_ratio_range: List[float] = (0.5, 2.0)
+    min_overlap_params: List[float] = (0.0, 1.4, 0.2, 0.1)
+    max_retry: int = 50
+    pad_output: bool = False
+    resize_eval_groundtruth: bool = True
+    groundtruth_padded_size: Optional[List[int]] = None
+    ignore_label: int = 0
+    aug_rand_hflip: bool = True
+    aug_scale_min: float = 1.0
+    aug_scale_max: float = 1.0
+    color_aug_ssd: bool = False
+    brightness: float = 0.2
+    saturation: float = 0.3
+    contrast: float = 0.5
+    aug_type: Optional[common.Augmentation] = None
+    sigma: float = 8.0
+    small_instance_area_threshold: int = 4096
+    small_instance_weight: float = 3.0
+    dtype: str = 'float32'
+    seed: int = None
+
+@dataclasses.dataclass
+class DataConfig(cfg.DataConfig):
+  """Input config for training."""
+  input_path: str = ''
+  tfds_name: str = ''
+  tfds_split: str = 'train'
+  global_batch_size: int = 0
+  is_training: bool = False
+  regenerate_source_id: bool = False
+  # TODO : Change the dtype to bloat16 for TPU training
+  dtype: str = 'bfloat16'
+  decoder: common.DataDecoder = common.DataDecoder()
+  shuffle_buffer_size: int = 10000
+  file_type: str = 'tfrecord'
+  drop_remainder: bool = True
+  parser: Parser = Parser()
+
+
+@dataclasses.dataclass
+class Losses(hyperparams.Config):
+  # TODO update these for maskformer
+  class_offset: int = 0
+  lambda_cls: float = 1.0
+  lambda_box: float = 5.0
+  lambda_giou: float = 2.0
+  background_cls_weight: float = 0.1
+  l2_weight_decay: float = 1e-4
+
+
+@dataclasses.dataclass
+class MaskFormer(hyperparams.Config):
+  # TODO update these for maskformer
+  """MaskFormer model definations."""
+  num_queries: int = 100
+  hidden_size: int = 256
+  # TODO: Actually there are 133 classes for panoptic segmentation
+  num_classes: int = 133  # 0: background
+  num_encoder_layers: int = 6
+  num_decoder_layers: int = 6
+  input_size: List[int] = dataclasses.field(default_factory=list)
+  backbone: backbones.Backbone = backbones.Backbone(
+      type='resnet', resnet=backbones.ResNet(model_id=50, bn_trainable=False))
+  norm_activation: common.NormActivation = common.NormActivation()
+  backbone_endpoint_name: str = '5'
+
+
+@dataclasses.dataclass
+class MaskFormerTask(cfg.TaskConfig):
+  model: MaskFormer = MaskFormer()
+  train_data: cfg.DataConfig = cfg.DataConfig()
+  validation_data: cfg.DataConfig = cfg.DataConfig()
+  losses: Losses = Losses()
+  init_checkpoint: Optional[str] = None
+  init_checkpoint_modules: Union[str, List[str]] = 'all'  # all, backbone
+  annotation_file: Optional[str] = None
+  per_category_metrics: bool = False
+
+# TODO : we should pass this via cmd 
+# COCO_INPUT_PATH_BASE = '/depot/davisjam/data/vishal/datasets/coco/'
+COCO_INPUT_PATH_BASE = 'gs://cam2-datasets/coco_panoptic/'
+COCO_TRAIN_EXAMPLES = 118287
+COCO_VAL_EXAMPLES = 5000
+
+
+@exp_factory.register_config_factory('maskformer_coco_panoptic')
+def maskformer_coco_panoptic() -> cfg.ExperimentConfig:
+  """Config to get results that matches the paper."""
+  train_batch_size = 8
+  eval_batch_size = 8
+  steps_per_epoch = 100
+#   steps_per_epoch = COCO_TRAIN_EXAMPLES // train_batch_size
+  train_steps = 300 * steps_per_epoch  # 300 epochs
+  decay_at = train_steps - 100 * steps_per_epoch  # 200 epochs
+  config = cfg.ExperimentConfig(
+      task=MaskFormerTask(
+          init_checkpoint='',
+          init_checkpoint_modules='backbone',
+          annotation_file=os.path.join(COCO_INPUT_PATH_BASE,'annotations'
+                                       'instances_train2017.json'),
+          model=MaskFormer(
+              input_size=[640, 640, 3],
+              norm_activation=common.NormActivation()),
+          losses=Losses(),
+          train_data=DataConfig(
+              input_path=os.path.join(COCO_INPUT_PATH_BASE, 'tfrecords/train*'),
+              is_training=True,
+              global_batch_size=train_batch_size,
+              shuffle_buffer_size=1000,
+              parser = Parser(
+                    output_size = [640,640],
+                    min_scale = 0.3,
+                    aspect_ratio_range = (0.5, 2.0),
+                    min_overlap_params = (0.0, 1.4, 0.2, 0.1),
+                    max_retry = 50,
+                    pad_output = False,
+                    resize_eval_groundtruth = True,
+                    groundtruth_padded_size = None,
+                    ignore_label = 0,
+                    aug_rand_hflip = True,
+                    aug_scale_min = 1.0,
+                    aug_scale_max = 1.0,
+                    color_aug_ssd = False,
+                    brightness = 0.2,
+                    saturation = 0.3,
+                    contrast = 0.5,
+                    # TODO choose appropriate augmentation
+                    aug_type = None,
+                    sigma = 8.0,
+                    small_instance_area_threshold = 4096,
+                    small_instance_weight = 3.0,
+                    dtype = 'bfloat16',
+                    seed = 2045,
+                )
+          ),
+          validation_data=DataConfig(
+              input_path=os.path.join(COCO_INPUT_PATH_BASE, 'tfrecords/val*'),
+              is_training=False,
+              global_batch_size=eval_batch_size,
+              drop_remainder=False,
+              parser = Parser(
+                    output_size = [640,640],
+                    pad_output = True,
+                    seed = 4096,
+                )
+              
+          )),
+      trainer=cfg.TrainerConfig(
+          train_steps=train_steps,
+          validation_steps=COCO_VAL_EXAMPLES // eval_batch_size,
+          steps_per_loop=steps_per_epoch,
+          summary_interval=steps_per_epoch,
+          checkpoint_interval=steps_per_epoch,
+          validation_interval= 5 * steps_per_epoch,
+          max_to_keep=1,
+          best_checkpoint_export_subdir='best_ckpt',
+          # TODO: Not defined the metric
+          optimizer_config=optimization.OptimizationConfig({
+              'optimizer': {
+                  'type': 'detr_adamw',
+                  'detr_adamw': {
+                      'weight_decay_rate': 1e-4,
+                      'global_clipnorm': 0.1,
+                      # Avoid AdamW legacy behavior.
+                      'gradient_clip_norm': 0.0
+                  }
+              },
+              'learning_rate': {
+                  'type': 'stepwise',
+                  'stepwise': {
+                      'boundaries': [decay_at],
+                      'values': [0.0001, 1.0e-05]
+                  }
+              },
+          })),
+      restrictions=[
+          'task.train_data.is_training != None',
+      ])
+  return config
diff --git a/models/official/projects/maskformer/configs/__pycache__/maskformer.cpython-39.pyc b/models/official/projects/maskformer/configs/__pycache__/maskformer.cpython-39.pyc
index a87ae537..eab1e3d1 100644
Binary files a/models/official/projects/maskformer/configs/__pycache__/maskformer.cpython-39.pyc and b/models/official/projects/maskformer/configs/__pycache__/maskformer.cpython-39.pyc differ
diff --git a/models/official/projects/maskformer/configs/maskformer.py b/models/official/projects/maskformer/configs/maskformer.py
index 7844e0b8..024edf59 100644
--- a/models/official/projects/maskformer/configs/maskformer.py
+++ b/models/official/projects/maskformer/configs/maskformer.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-"""DETR configurations."""
+"""MaskFormer configurations."""
 
 import dataclasses
 import os
@@ -21,10 +21,9 @@
 from official.core import config_definitions as cfg
 from official.core import exp_factory
 from official.modeling import hyperparams
-# from official.projects.detr import optimization
-# from official.projects.detr.dataloaders import coco
 from official.vision.configs import backbones
 from official.vision.configs import common
+from official.projects.maskformer import optimization
 
 
 @dataclasses.dataclass
@@ -62,6 +61,7 @@ class DataConfig(cfg.DataConfig):
   global_batch_size: int = 0
   is_training: bool = False
   regenerate_source_id: bool = False
+  # TODO : Change the dtype to bloat16 for TPU training
   dtype: str = 'bfloat16'
   decoder: common.DataDecoder = common.DataDecoder()
   shuffle_buffer_size: int = 10000
@@ -79,6 +79,10 @@ class Losses(hyperparams.Config):
   lambda_giou: float = 2.0
   background_cls_weight: float = 0.1
   l2_weight_decay: float = 1e-4
+  cost_class = 1.0
+  cost_dice = 1.0
+  cost_focal = 20.0
+  no_object_weight = .1
 
 
 @dataclasses.dataclass
@@ -87,8 +91,10 @@ class MaskFormer(hyperparams.Config):
   """MaskFormer model definations."""
   num_queries: int = 100
   hidden_size: int = 256
-  num_classes: int = 91  # 0: background
-  num_encoder_layers: int = 6
+  # TODO: Actually there are 133 classes for panoptic segmentation
+  num_classes: int = 133  # 0: background
+  fpn_encoder_layers: int = 6
+  detr_encoder_layers: int = 0
   num_decoder_layers: int = 6
   input_size: List[int] = dataclasses.field(default_factory=list)
   backbone: backbones.Backbone = backbones.Backbone(
@@ -108,7 +114,9 @@ class MaskFormerTask(cfg.TaskConfig):
   annotation_file: Optional[str] = None
   per_category_metrics: bool = False
 
-COCO_INPUT_PATH_BASE = '/depot/davisjam/data/vishal/datasets/coco'
+# TODO : we should pass this via cmd 
+# COCO_INPUT_PATH_BASE = '/depot/davisjam/data/vishal/datasets/coco/'
+COCO_INPUT_PATH_BASE = 'gs://cam2-datasets/coco_panoptic/'
 COCO_TRAIN_EXAMPLES = 118287
 COCO_VAL_EXAMPLES = 5000
 
@@ -116,32 +124,64 @@ class MaskFormerTask(cfg.TaskConfig):
 @exp_factory.register_config_factory('maskformer_coco_panoptic')
 def maskformer_coco_panoptic() -> cfg.ExperimentConfig:
   """Config to get results that matches the paper."""
-  train_batch_size = 64
-  eval_batch_size = 64
-  steps_per_epoch = COCO_TRAIN_EXAMPLES // train_batch_size
+  train_batch_size = 8
+  eval_batch_size = 8
+  steps_per_epoch = 100
+#   steps_per_epoch = COCO_TRAIN_EXAMPLES // train_batch_size
   train_steps = 300 * steps_per_epoch  # 300 epochs
   decay_at = train_steps - 100 * steps_per_epoch  # 200 epochs
   config = cfg.ExperimentConfig(
       task=MaskFormerTask(
           init_checkpoint='',
           init_checkpoint_modules='backbone',
-          annotation_file=os.path.join(COCO_INPUT_PATH_BASE,
-                                       'instances_val2017.json'),
+          annotation_file=os.path.join(COCO_INPUT_PATH_BASE,'annotations'
+                                       'instances_train2017.json'),
           model=MaskFormer(
-              input_size=[1333, 1333, 3],
+              input_size=[640, 640, 3],
               norm_activation=common.NormActivation()),
           losses=Losses(),
           train_data=DataConfig(
-              input_path=os.path.join(COCO_INPUT_PATH_BASE, 'train*'),
+              input_path=os.path.join(COCO_INPUT_PATH_BASE, 'tfrecords/train*'),
               is_training=True,
               global_batch_size=train_batch_size,
               shuffle_buffer_size=1000,
+              parser = Parser(
+                    output_size = [640,640],
+                    min_scale = 0.3,
+                    aspect_ratio_range = (0.5, 2.0),
+                    min_overlap_params = (0.0, 1.4, 0.2, 0.1),
+                    max_retry = 50,
+                    pad_output = False,
+                    resize_eval_groundtruth = True,
+                    groundtruth_padded_size = None,
+                    ignore_label = 0,
+                    aug_rand_hflip = True,
+                    aug_scale_min = 1.0,
+                    aug_scale_max = 1.0,
+                    color_aug_ssd = False,
+                    brightness = 0.2,
+                    saturation = 0.3,
+                    contrast = 0.5,
+                    # TODO choose appropriate augmentation
+                    aug_type = None,
+                    sigma = 8.0,
+                    small_instance_area_threshold = 4096,
+                    small_instance_weight = 3.0,
+                    dtype = 'bfloat16',
+                    seed = 2045,
+                )
           ),
           validation_data=DataConfig(
-              input_path=os.path.join(COCO_INPUT_PATH_BASE, 'val*'),
+              input_path=os.path.join(COCO_INPUT_PATH_BASE, 'tfrecords/val*'),
               is_training=False,
               global_batch_size=eval_batch_size,
               drop_remainder=False,
+              parser = Parser(
+                    output_size = [640,640],
+                    pad_output = True,
+                    seed = 4096,
+                )
+              
           )),
       trainer=cfg.TrainerConfig(
           train_steps=train_steps,
@@ -149,10 +189,10 @@ def maskformer_coco_panoptic() -> cfg.ExperimentConfig:
           steps_per_loop=steps_per_epoch,
           summary_interval=steps_per_epoch,
           checkpoint_interval=steps_per_epoch,
-          validation_interval=5 * steps_per_epoch,
+          validation_interval= 5 * steps_per_epoch,
           max_to_keep=1,
           best_checkpoint_export_subdir='best_ckpt',
-          best_checkpoint_eval_metric='AP',
+          # TODO: Not defined the metric
           optimizer_config=optimization.OptimizationConfig({
               'optimizer': {
                   'type': 'detr_adamw',
diff --git a/models/official/projects/maskformer/data/create_coco_tf_record.py b/models/official/projects/maskformer/data/create_coco_tf_record.py
new file mode 100644
index 00000000..07ca51f3
--- /dev/null
+++ b/models/official/projects/maskformer/data/create_coco_tf_record.py
@@ -0,0 +1,742 @@
+# Copyright 2023 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+r"""Convert raw COCO dataset to TFRecord format.
+
+This scripts follows the label map decoder format and supports detection
+boxes, instance masks and captions.
+
+Example usage:
+    python create_coco_tf_record.py --logtostderr \
+      --image_dir="${TRAIN_IMAGE_DIR}" \
+      --image_info_file="${TRAIN_IMAGE_INFO_FILE}" \
+      --object_annotations_file="${TRAIN_ANNOTATIONS_FILE}" \
+      --caption_annotations_file="${CAPTION_ANNOTATIONS_FILE}" \
+      --output_file_prefix="${OUTPUT_DIR/FILE_PREFIX}" \
+      --num_shards=100
+"""
+import collections
+import json
+import logging
+import os
+from absl import app  # pylint:disable=unused-import
+from absl import flags
+import numpy as np
+
+from pycocotools import mask
+import tensorflow as tf
+from tqdm import tqdm
+import multiprocessing as mp
+from official.vision.data import tfrecord_lib
+
+
+flags.DEFINE_boolean(
+    'include_masks', False, 'Whether to include instance segmentations masks '
+    '(PNG encoded) in the result. default: False.')
+flags.DEFINE_multi_string('image_dir', '', 'Directory containing images.')
+flags.DEFINE_string(
+    'image_info_file', '', 'File containing image information. '
+    'Tf Examples in the output files correspond to the image '
+    'info entries in this file. If this file is not provided '
+    'object_annotations_file is used if present. Otherwise, '
+    'caption_annotations_file is used to get image info.')
+flags.DEFINE_string(
+    'object_annotations_file', '', 'File containing object '
+    'annotations - boxes and instance masks.')
+flags.DEFINE_string('caption_annotations_file', '', 'File containing image '
+                    'captions.')
+flags.DEFINE_string('panoptic_annotations_file', '', 'File containing panoptic '
+                    'annotations.')
+flags.DEFINE_string('panoptic_masks_dir', '',
+                    'Directory containing panoptic masks annotations.')
+flags.DEFINE_boolean(
+    'include_panoptic_masks', False, 'Whether to include category and '
+    'instance masks in the result. These are required to run the PQ evaluator '
+    'default: False.')
+flags.DEFINE_boolean(
+    'panoptic_skip_crowd', False, 'Whether to skip crowd or not for panoptic '
+    'annotations. default: False.')
+flags.DEFINE_string('output_file_prefix', '/tmp/train', 'Path to output file')
+flags.DEFINE_integer('num_shards', 32, 'Number of shards for output file.')
+_NUM_PROCESSES = flags.DEFINE_integer(
+    'num_processes', None,
+    ('Number of parallel processes to use. '
+     'If set to 0, disables multi-processing.'))
+
+
+FLAGS = flags.FLAGS
+
+logger = tf.get_logger()
+logger.setLevel(logging.INFO)
+
+_VOID_LABEL = 0
+_VOID_INSTANCE_ID = 0
+_THING_CLASS_ID = 1
+_STUFF_CLASSES_OFFSET = 90
+
+COCO_CATEGORIES = [
+    {"color": [220, 20, 60], "isthing": 1, "id": 1, "name": "person"},
+    {"color": [119, 11, 32], "isthing": 1, "id": 2, "name": "bicycle"},
+    {"color": [0, 0, 142], "isthing": 1, "id": 3, "name": "car"},
+    {"color": [0, 0, 230], "isthing": 1, "id": 4, "name": "motorcycle"},
+    {"color": [106, 0, 228], "isthing": 1, "id": 5, "name": "airplane"},
+    {"color": [0, 60, 100], "isthing": 1, "id": 6, "name": "bus"},
+    {"color": [0, 80, 100], "isthing": 1, "id": 7, "name": "train"},
+    {"color": [0, 0, 70], "isthing": 1, "id": 8, "name": "truck"},
+    {"color": [0, 0, 192], "isthing": 1, "id": 9, "name": "boat"},
+    {"color": [250, 170, 30], "isthing": 1, "id": 10, "name": "traffic light"},
+    {"color": [100, 170, 30], "isthing": 1, "id": 11, "name": "fire hydrant"},
+    {"color": [220, 220, 0], "isthing": 1, "id": 13, "name": "stop sign"},
+    {"color": [175, 116, 175], "isthing": 1, "id": 14, "name": "parking meter"},
+    {"color": [250, 0, 30], "isthing": 1, "id": 15, "name": "bench"},
+    {"color": [165, 42, 42], "isthing": 1, "id": 16, "name": "bird"},
+    {"color": [255, 77, 255], "isthing": 1, "id": 17, "name": "cat"},
+    {"color": [0, 226, 252], "isthing": 1, "id": 18, "name": "dog"},
+    {"color": [182, 182, 255], "isthing": 1, "id": 19, "name": "horse"},
+    {"color": [0, 82, 0], "isthing": 1, "id": 20, "name": "sheep"},
+    {"color": [120, 166, 157], "isthing": 1, "id": 21, "name": "cow"},
+    {"color": [110, 76, 0], "isthing": 1, "id": 22, "name": "elephant"},
+    {"color": [174, 57, 255], "isthing": 1, "id": 23, "name": "bear"},
+    {"color": [199, 100, 0], "isthing": 1, "id": 24, "name": "zebra"},
+    {"color": [72, 0, 118], "isthing": 1, "id": 25, "name": "giraffe"},
+    {"color": [255, 179, 240], "isthing": 1, "id": 27, "name": "backpack"},
+    {"color": [0, 125, 92], "isthing": 1, "id": 28, "name": "umbrella"},
+    {"color": [209, 0, 151], "isthing": 1, "id": 31, "name": "handbag"},
+    {"color": [188, 208, 182], "isthing": 1, "id": 32, "name": "tie"},
+    {"color": [0, 220, 176], "isthing": 1, "id": 33, "name": "suitcase"},
+    {"color": [255, 99, 164], "isthing": 1, "id": 34, "name": "frisbee"},
+    {"color": [92, 0, 73], "isthing": 1, "id": 35, "name": "skis"},
+    {"color": [133, 129, 255], "isthing": 1, "id": 36, "name": "snowboard"},
+    {"color": [78, 180, 255], "isthing": 1, "id": 37, "name": "sports ball"},
+    {"color": [0, 228, 0], "isthing": 1, "id": 38, "name": "kite"},
+    {"color": [174, 255, 243], "isthing": 1, "id": 39, "name": "baseball bat"},
+    {"color": [45, 89, 255], "isthing": 1, "id": 40, "name": "baseball glove"},
+    {"color": [134, 134, 103], "isthing": 1, "id": 41, "name": "skateboard"},
+    {"color": [145, 148, 174], "isthing": 1, "id": 42, "name": "surfboard"},
+    {"color": [255, 208, 186], "isthing": 1, "id": 43, "name": "tennis racket"},
+    {"color": [197, 226, 255], "isthing": 1, "id": 44, "name": "bottle"},
+    {"color": [171, 134, 1], "isthing": 1, "id": 46, "name": "wine glass"},
+    {"color": [109, 63, 54], "isthing": 1, "id": 47, "name": "cup"},
+    {"color": [207, 138, 255], "isthing": 1, "id": 48, "name": "fork"},
+    {"color": [151, 0, 95], "isthing": 1, "id": 49, "name": "knife"},
+    {"color": [9, 80, 61], "isthing": 1, "id": 50, "name": "spoon"},
+    {"color": [84, 105, 51], "isthing": 1, "id": 51, "name": "bowl"},
+    {"color": [74, 65, 105], "isthing": 1, "id": 52, "name": "banana"},
+    {"color": [166, 196, 102], "isthing": 1, "id": 53, "name": "apple"},
+    {"color": [208, 195, 210], "isthing": 1, "id": 54, "name": "sandwich"},
+    {"color": [255, 109, 65], "isthing": 1, "id": 55, "name": "orange"},
+    {"color": [0, 143, 149], "isthing": 1, "id": 56, "name": "broccoli"},
+    {"color": [179, 0, 194], "isthing": 1, "id": 57, "name": "carrot"},
+    {"color": [209, 99, 106], "isthing": 1, "id": 58, "name": "hot dog"},
+    {"color": [5, 121, 0], "isthing": 1, "id": 59, "name": "pizza"},
+    {"color": [227, 255, 205], "isthing": 1, "id": 60, "name": "donut"},
+    {"color": [147, 186, 208], "isthing": 1, "id": 61, "name": "cake"},
+    {"color": [153, 69, 1], "isthing": 1, "id": 62, "name": "chair"},
+    {"color": [3, 95, 161], "isthing": 1, "id": 63, "name": "couch"},
+    {"color": [163, 255, 0], "isthing": 1, "id": 64, "name": "potted plant"},
+    {"color": [119, 0, 170], "isthing": 1, "id": 65, "name": "bed"},
+    {"color": [0, 182, 199], "isthing": 1, "id": 67, "name": "dining table"},
+    {"color": [0, 165, 120], "isthing": 1, "id": 70, "name": "toilet"},
+    {"color": [183, 130, 88], "isthing": 1, "id": 72, "name": "tv"},
+    {"color": [95, 32, 0], "isthing": 1, "id": 73, "name": "laptop"},
+    {"color": [130, 114, 135], "isthing": 1, "id": 74, "name": "mouse"},
+    {"color": [110, 129, 133], "isthing": 1, "id": 75, "name": "remote"},
+    {"color": [166, 74, 118], "isthing": 1, "id": 76, "name": "keyboard"},
+    {"color": [219, 142, 185], "isthing": 1, "id": 77, "name": "cell phone"},
+    {"color": [79, 210, 114], "isthing": 1, "id": 78, "name": "microwave"},
+    {"color": [178, 90, 62], "isthing": 1, "id": 79, "name": "oven"},
+    {"color": [65, 70, 15], "isthing": 1, "id": 80, "name": "toaster"},
+    {"color": [127, 167, 115], "isthing": 1, "id": 81, "name": "sink"},
+    {"color": [59, 105, 106], "isthing": 1, "id": 82, "name": "refrigerator"},
+    {"color": [142, 108, 45], "isthing": 1, "id": 84, "name": "book"},
+    {"color": [196, 172, 0], "isthing": 1, "id": 85, "name": "clock"},
+    {"color": [95, 54, 80], "isthing": 1, "id": 86, "name": "vase"},
+    {"color": [128, 76, 255], "isthing": 1, "id": 87, "name": "scissors"},
+    {"color": [201, 57, 1], "isthing": 1, "id": 88, "name": "teddy bear"},
+    {"color": [246, 0, 122], "isthing": 1, "id": 89, "name": "hair drier"},
+    {"color": [191, 162, 208], "isthing": 1, "id": 90, "name": "toothbrush"},
+    {"color": [255, 255, 128], "isthing": 0, "id": 92, "name": "banner"},
+    {"color": [147, 211, 203], "isthing": 0, "id": 93, "name": "blanket"},
+    {"color": [150, 100, 100], "isthing": 0, "id": 95, "name": "bridge"},
+    {"color": [168, 171, 172], "isthing": 0, "id": 100, "name": "cardboard"},
+    {"color": [146, 112, 198], "isthing": 0, "id": 107, "name": "counter"},
+    {"color": [210, 170, 100], "isthing": 0, "id": 109, "name": "curtain"},
+    {"color": [92, 136, 89], "isthing": 0, "id": 112, "name": "door-stuff"},
+    {"color": [218, 88, 184], "isthing": 0, "id": 118, "name": "floor-wood"},
+    {"color": [241, 129, 0], "isthing": 0, "id": 119, "name": "flower"},
+    {"color": [217, 17, 255], "isthing": 0, "id": 122, "name": "fruit"},
+    {"color": [124, 74, 181], "isthing": 0, "id": 125, "name": "gravel"},
+    {"color": [70, 70, 70], "isthing": 0, "id": 128, "name": "house"},
+    {"color": [255, 228, 255], "isthing": 0, "id": 130, "name": "light"},
+    {"color": [154, 208, 0], "isthing": 0, "id": 133, "name": "mirror-stuff"},
+    {"color": [193, 0, 92], "isthing": 0, "id": 138, "name": "net"},
+    {"color": [76, 91, 113], "isthing": 0, "id": 141, "name": "pillow"},
+    {"color": [255, 180, 195], "isthing": 0, "id": 144, "name": "platform"},
+    {"color": [106, 154, 176], "isthing": 0, "id": 145, "name": "playingfield"},
+    {"color": [230, 150, 140], "isthing": 0, "id": 147, "name": "railroad"},
+    {"color": [60, 143, 255], "isthing": 0, "id": 148, "name": "river"},
+    {"color": [128, 64, 128], "isthing": 0, "id": 149, "name": "road"},
+    {"color": [92, 82, 55], "isthing": 0, "id": 151, "name": "roof"},
+    {"color": [254, 212, 124], "isthing": 0, "id": 154, "name": "sand"},
+    {"color": [73, 77, 174], "isthing": 0, "id": 155, "name": "sea"},
+    {"color": [255, 160, 98], "isthing": 0, "id": 156, "name": "shelf"},
+    {"color": [255, 255, 255], "isthing": 0, "id": 159, "name": "snow"},
+    {"color": [104, 84, 109], "isthing": 0, "id": 161, "name": "stairs"},
+    {"color": [169, 164, 131], "isthing": 0, "id": 166, "name": "tent"},
+    {"color": [225, 199, 255], "isthing": 0, "id": 168, "name": "towel"},
+    {"color": [137, 54, 74], "isthing": 0, "id": 171, "name": "wall-brick"},
+    {"color": [135, 158, 223], "isthing": 0, "id": 175, "name": "wall-stone"},
+    {"color": [7, 246, 231], "isthing": 0, "id": 176, "name": "wall-tile"},
+    {"color": [107, 255, 200], "isthing": 0, "id": 177, "name": "wall-wood"},
+    {"color": [58, 41, 149], "isthing": 0, "id": 178, "name": "water-other"},
+    {"color": [183, 121, 142], "isthing": 0, "id": 180, "name": "window-blind"},
+    {"color": [255, 73, 97], "isthing": 0, "id": 181, "name": "window-other"},
+    {"color": [107, 142, 35], "isthing": 0, "id": 184, "name": "tree-merged"},
+    {"color": [190, 153, 153], "isthing": 0, "id": 185, "name": "fence-merged"},
+    {"color": [146, 139, 141], "isthing": 0, "id": 186, "name": "ceiling-merged"},
+    {"color": [70, 130, 180], "isthing": 0, "id": 187, "name": "sky-other-merged"},
+    {"color": [134, 199, 156], "isthing": 0, "id": 188, "name": "cabinet-merged"},
+    {"color": [209, 226, 140], "isthing": 0, "id": 189, "name": "table-merged"},
+    {"color": [96, 36, 108], "isthing": 0, "id": 190, "name": "floor-other-merged"},
+    {"color": [96, 96, 96], "isthing": 0, "id": 191, "name": "pavement-merged"},
+    {"color": [64, 170, 64], "isthing": 0, "id": 192, "name": "mountain-merged"},
+    {"color": [152, 251, 152], "isthing": 0, "id": 193, "name": "grass-merged"},
+    {"color": [208, 229, 228], "isthing": 0, "id": 194, "name": "dirt-merged"},
+    {"color": [206, 186, 171], "isthing": 0, "id": 195, "name": "paper-merged"},
+    {"color": [152, 161, 64], "isthing": 0, "id": 196, "name": "food-other-merged"},
+    {"color": [116, 112, 0], "isthing": 0, "id": 197, "name": "building-other-merged"},
+    {"color": [0, 114, 143], "isthing": 0, "id": 198, "name": "rock-merged"},
+    {"color": [102, 102, 156], "isthing": 0, "id": 199, "name": "wall-other-merged"},
+    {"color": [250, 141, 255], "isthing": 0, "id": 200, "name": "rug-merged"},
+]
+
+def coco_segmentation_to_mask_png(segmentation, height, width, is_crowd):
+  """Encode a COCO mask segmentation as PNG string."""
+  run_len_encoding = mask.frPyObjects(segmentation, height, width)
+  binary_mask = mask.decode(run_len_encoding)
+  if not is_crowd:
+    binary_mask = np.amax(binary_mask, axis=2)
+
+  return tfrecord_lib.encode_mask_as_png(binary_mask)
+
+
+def generate_coco_panoptics_masks(segments_info, mask_path,
+                                  include_panoptic_masks,
+                                  is_category_thing):
+  """Creates masks for panoptic segmentation task.
+
+  Args:
+    segments_info: a list of dicts, where each dict has keys: [u'id',
+      u'category_id', u'area', u'bbox', u'iscrowd'], detailing information for
+      each segment in the panoptic mask.
+    mask_path: path to the panoptic mask.
+    include_panoptic_masks: bool, when set to True, category and instance
+      masks are included in the outputs. Set this to True, when using
+      the Panoptic Quality evaluator.
+    is_category_thing: a dict with category ids as keys and, 0/1 as values to
+      represent "stuff" and "things" classes respectively.
+
+  Returns:
+    A dict with keys: [u'semantic_segmentation_mask', u'category_mask',
+      u'instance_mask']. The dict contains 'category_mask' and 'instance_mask'
+      only if `include_panoptic_eval_masks` is set to True.
+  """
+  rgb_mask = tfrecord_lib.read_image(mask_path)
+  r, g, b = np.split(rgb_mask, 3, axis=-1)
+
+  # decode rgb encoded panoptic mask to get segments ids
+  # refer https://cocodataset.org/#format-data
+  segments_encoded_mask = (r + g * 256 + b * (256**2)).squeeze()
+
+  # create contiguous ids for segments
+  _meta = {}
+  
+  thing_dataset_id_to_contiguous_id = {}
+  stuff_dataset_id_to_contiguous_id = {}
+  for i, cat in enumerate(COCO_CATEGORIES):
+      if cat["isthing"]:
+          thing_dataset_id_to_contiguous_id[cat["id"]] = i
+      else:
+          stuff_dataset_id_to_contiguous_id[cat["id"]] = i
+  
+  _meta["thing_dataset_id_to_contiguous_id"] = thing_dataset_id_to_contiguous_id
+  _meta["stuff_dataset_id_to_contiguous_id"] = stuff_dataset_id_to_contiguous_id
+
+  # All required masks
+  semantic_segmentation_mask = np.ones_like(
+      segments_encoded_mask, dtype=np.uint8) * _VOID_LABEL
+  if include_panoptic_masks:
+    category_mask = np.ones_like(
+        segments_encoded_mask, dtype=np.uint8) * _VOID_LABEL
+    instance_mask = np.ones_like(
+        segments_encoded_mask, dtype=np.uint8) * _VOID_INSTANCE_ID
+    contiguous_id_mask = np.ones_like(
+        segments_encoded_mask, dtype=np.uint8) * _VOID_INSTANCE_ID
+    
+  class_ids = []
+  
+  for idx, segment in enumerate(segments_info):
+    segment_id = segment['id']
+    category_id = segment['category_id']
+    is_crowd = segment['iscrowd']
+
+    if category_id in _meta["thing_dataset_id_to_contiguous_id"]:
+      contiguous_id = _meta["thing_dataset_id_to_contiguous_id"][category_id]
+    else:
+      contiguous_id = _meta["stuff_dataset_id_to_contiguous_id"][category_id]
+
+    if FLAGS.panoptic_skip_crowd and is_crowd:
+      continue
+
+    if is_category_thing[category_id]:
+      # This for thing
+      encoded_category_id = _THING_CLASS_ID
+      instance_id = idx + 1
+    else:
+      # This is for stuff (for stuff no instance id)
+      encoded_category_id = category_id - _STUFF_CLASSES_OFFSET
+      instance_id = _VOID_INSTANCE_ID
+
+    segment_mask = (segments_encoded_mask == segment_id)
+
+    semantic_segmentation_mask[segment_mask] = encoded_category_id
+    contiguous_id_mask[segment_mask] = contiguous_id
+    if include_panoptic_masks:
+      category_mask[segment_mask] =  category_id
+      instance_mask[segment_mask] = instance_id
+      if not is_crowd:
+        class_ids.append(contiguous_id)
+        
+        
+
+  outputs = {
+      'semantic_segmentation_mask': tfrecord_lib.encode_mask_as_png(
+          semantic_segmentation_mask)
+      }
+  print("class_ids_raw : ", class_ids)
+  if include_panoptic_masks:
+    outputs.update({
+        'category_mask': tfrecord_lib.encode_mask_as_png(category_mask),
+        'instance_mask': tfrecord_lib.encode_mask_as_png(instance_mask),
+        'class_ids': class_ids,
+        'contiguous_id_mask': tfrecord_lib.encode_mask_as_png(contiguous_id_mask),
+        })
+  return outputs
+
+
+def coco_annotations_to_lists(bbox_annotations, id_to_name_map,
+                              image_height, image_width, include_masks):
+  """Converts COCO annotations to feature lists."""
+
+  data = dict((k, list()) for k in
+              ['xmin', 'xmax', 'ymin', 'ymax', 'is_crowd',
+               'category_id', 'category_names', 'area'])
+  if include_masks:
+    data['encoded_mask_png'] = []
+
+  num_annotations_skipped = 0
+
+  for object_annotations in bbox_annotations:
+    (x, y, width, height) = tuple(object_annotations['bbox'])
+
+    if width <= 0 or height <= 0:
+      num_annotations_skipped += 1
+      continue
+    if x + width > image_width or y + height > image_height:
+      num_annotations_skipped += 1
+      continue
+    data['xmin'].append(float(x) / image_width)
+    data['xmax'].append(float(x + width) / image_width)
+    data['ymin'].append(float(y) / image_height)
+    data['ymax'].append(float(y + height) / image_height)
+    data['is_crowd'].append(object_annotations['iscrowd'])
+    category_id = int(object_annotations['category_id'])
+    data['category_id'].append(category_id)
+    data['category_names'].append(id_to_name_map[category_id].encode('utf8'))
+    data['area'].append(object_annotations['area'])
+
+    if include_masks:
+      data['encoded_mask_png'].append(
+          coco_segmentation_to_mask_png(object_annotations['segmentation'],
+                                        image_height, image_width,
+                                        object_annotations['iscrowd'])
+      )
+
+  return data, num_annotations_skipped
+
+
+def bbox_annotations_to_feature_dict(
+    bbox_annotations, image_height, image_width, id_to_name_map, include_masks):
+  """Convert COCO annotations to an encoded feature dict."""
+
+  data, num_skipped = coco_annotations_to_lists(
+      bbox_annotations, id_to_name_map, image_height, image_width,
+      include_masks)
+  feature_dict = {}
+  if len(bbox_annotations) != num_skipped:
+    feature_dict = {
+        'image/object/bbox/xmin': tfrecord_lib.convert_to_feature(data['xmin']),
+        'image/object/bbox/xmax': tfrecord_lib.convert_to_feature(data['xmax']),
+        'image/object/bbox/ymin': tfrecord_lib.convert_to_feature(data['ymin']),
+        'image/object/bbox/ymax': tfrecord_lib.convert_to_feature(data['ymax']),
+        'image/object/class/text': tfrecord_lib.convert_to_feature(
+            data['category_names']
+        ),
+        'image/object/class/label': tfrecord_lib.convert_to_feature(
+            data['category_id']
+        ),
+        'image/object/is_crowd': tfrecord_lib.convert_to_feature(
+            data['is_crowd']
+        ),
+        'image/object/area': tfrecord_lib.convert_to_feature(
+            data['area'], 'float_list'
+        ),
+    }
+    if include_masks:
+      feature_dict['image/object/mask'] = tfrecord_lib.convert_to_feature(
+          data['encoded_mask_png']
+      )
+
+  return feature_dict, num_skipped
+
+
+def encode_caption_annotations(caption_annotations):
+  captions = []
+  for caption_annotation in caption_annotations:
+    captions.append(caption_annotation['caption'].encode('utf8'))
+
+  return captions
+
+
+def create_tf_example(image,
+                      image_dirs,
+                      panoptic_masks_dir=None,
+                      bbox_annotations=None,
+                      id_to_name_map=None,
+                      caption_annotations=None,
+                      panoptic_annotation=None,
+                      is_category_thing=None,
+                      include_panoptic_masks=False,
+                      include_masks=False):
+  """Converts image and annotations to a tf.Example proto.
+
+  Args:
+    image: dict with keys: [u'license', u'file_name', u'coco_url', u'height',
+      u'width', u'date_captured', u'flickr_url', u'id']
+    image_dirs: list of directories containing the image files.
+    panoptic_masks_dir: `str` of the panoptic masks directory.
+    bbox_annotations:
+      list of dicts with keys: [u'segmentation', u'area', u'iscrowd',
+        u'image_id', u'bbox', u'category_id', u'id'] Notice that bounding box
+        coordinates in the official COCO dataset are given as [x, y, width,
+        height] tuples using absolute coordinates where x, y represent the
+        top-left (0-indexed) corner.  This function converts to the format
+        expected by the Tensorflow Object Detection API (which is which is
+        [ymin, xmin, ymax, xmax] with coordinates normalized relative to image
+        size).
+    id_to_name_map: a dict mapping category IDs to string names.
+    caption_annotations:
+      list of dict with keys: [u'id', u'image_id', u'str'].
+    panoptic_annotation: dict with keys: [u'image_id', u'file_name',
+      u'segments_info']. Where the value for segments_info is a list of dicts,
+      with each dict containing information for a single segment in the mask.
+    is_category_thing: `bool`, whether it is a category thing.
+    include_panoptic_masks: `bool`, whether to include panoptic masks.
+    include_masks: Whether to include instance segmentations masks
+      (PNG encoded) in the result. default: False.
+
+  Returns:
+    example: The converted tf.Example
+    num_annotations_skipped: Number of (invalid) annotations that were ignored.
+
+  Raises:
+    ValueError: if the image pointed to by data['filename'] is not a valid JPEG,
+      does not exist, or is not unique across image directories.
+  """
+  image_height = image['height']
+  image_width = image['width']
+  filename = image['file_name']
+  image_id = image['id']
+
+  if len(image_dirs) > 1:
+    full_paths = [os.path.join(image_dir, filename) for image_dir in image_dirs]
+    full_existing_paths = [p for p in full_paths if tf.io.gfile.exists(p)]
+    if not full_existing_paths:
+      raise ValueError(
+          '{} does not exist across image directories.'.format(filename))
+    if len(full_existing_paths) > 1:
+      raise ValueError(
+          '{} is not unique across image directories'.format(filename))
+    full_path, = full_existing_paths
+  # If there is only one image directory, it's not worth checking for existence,
+  # since trying to open the file will raise an informative error message if it
+  # does not exist.
+  else:
+    image_dir, = image_dirs
+    full_path = os.path.join(image_dir, filename)
+
+  with tf.io.gfile.GFile(full_path, 'rb') as fid:
+    encoded_jpg = fid.read()
+
+  feature_dict = tfrecord_lib.image_info_to_feature_dict(
+      image_height, image_width, filename, image_id, encoded_jpg, 'jpg')
+
+  num_annotations_skipped = 0
+  if bbox_annotations:
+    box_feature_dict, num_skipped = bbox_annotations_to_feature_dict(
+        bbox_annotations, image_height, image_width, id_to_name_map,
+        include_masks)
+    num_annotations_skipped += num_skipped
+    feature_dict.update(box_feature_dict)
+
+  if caption_annotations:
+    encoded_captions = encode_caption_annotations(caption_annotations)
+    feature_dict.update(
+        {'image/caption': tfrecord_lib.convert_to_feature(encoded_captions)})
+
+  if panoptic_annotation:
+    segments_info = panoptic_annotation['segments_info']
+   
+    panoptic_mask_filename = os.path.join(
+        panoptic_masks_dir,
+        panoptic_annotation['file_name'])
+    encoded_panoptic_masks = generate_coco_panoptics_masks(
+        segments_info, panoptic_mask_filename, include_panoptic_masks,
+        is_category_thing)
+    feature_dict.update(
+        {'image/segmentation/class/encoded': tfrecord_lib.convert_to_feature(
+            encoded_panoptic_masks['semantic_segmentation_mask'])})
+    print("Encoded panoptic class ids :", encoded_panoptic_masks['class_ids'])
+    if include_panoptic_masks:
+      feature_dict.update({
+          'image/panoptic/category_mask': tfrecord_lib.convert_to_feature(
+              encoded_panoptic_masks['category_mask']),
+          'image/panoptic/instance_mask': tfrecord_lib.convert_to_feature(
+              encoded_panoptic_masks['instance_mask']),
+          'image/panoptic/class_ids': tfrecord_lib.convert_to_feature(
+              encoded_panoptic_masks['class_ids'], value_type="int64_list"),
+          'image/panoptic/contiguous_mask': tfrecord_lib.convert_to_feature(
+              encoded_panoptic_masks['contiguous_id_mask'])
+            })
+  
+  example = tf.train.Example(features=tf.train.Features(feature=feature_dict))
+  return example, num_annotations_skipped
+
+
+def _load_object_annotations(object_annotations_file):
+  """Loads object annotation JSON file."""
+  with tf.io.gfile.GFile(object_annotations_file, 'r') as fid:
+    obj_annotations = json.load(fid)
+
+  images = obj_annotations['images']
+  id_to_name_map = dict((element['id'], element['name']) for element in
+                        obj_annotations['categories'])
+
+  img_to_obj_annotation = collections.defaultdict(list)
+  logging.info('Building bounding box index.')
+  for annotation in obj_annotations['annotations']:
+    image_id = annotation['image_id']
+    img_to_obj_annotation[image_id].append(annotation)
+
+  missing_annotation_count = 0
+  for image in images:
+    image_id = image['id']
+    if image_id not in img_to_obj_annotation:
+      missing_annotation_count += 1
+
+  logging.info('%d images are missing bboxes.', missing_annotation_count)
+
+  return img_to_obj_annotation, id_to_name_map
+
+
+def _load_caption_annotations(caption_annotations_file):
+  """Loads caption annotation JSON file."""
+  with tf.io.gfile.GFile(caption_annotations_file, 'r') as fid:
+    caption_annotations = json.load(fid)
+
+  img_to_caption_annotation = collections.defaultdict(list)
+  logging.info('Building caption index.')
+  for annotation in caption_annotations['annotations']:
+    image_id = annotation['image_id']
+    img_to_caption_annotation[image_id].append(annotation)
+
+  missing_annotation_count = 0
+  images = caption_annotations['images']
+  for image in images:
+    image_id = image['id']
+    if image_id not in img_to_caption_annotation:
+      missing_annotation_count += 1
+
+  logging.info('%d images are missing captions.', missing_annotation_count)
+
+  return img_to_caption_annotation
+
+
+def _load_panoptic_annotations(panoptic_annotations_file):
+  """Loads panoptic annotation from file."""
+  with tf.io.gfile.GFile(panoptic_annotations_file, 'r') as fid:
+    panoptic_annotations = json.load(fid)
+
+  img_to_panoptic_annotation = dict()
+  logging.info('Building panoptic index.')
+  for annotation in panoptic_annotations['annotations']:
+    image_id = annotation['image_id']
+    img_to_panoptic_annotation[image_id] = annotation
+
+  is_category_thing = dict()
+  for category_info in panoptic_annotations['categories']:
+    is_category_thing[category_info['id']] = category_info['isthing'] == 1
+
+  missing_annotation_count = 0
+  images = panoptic_annotations['images']
+  for image in images:
+    image_id = image['id']
+    if image_id not in img_to_panoptic_annotation:
+      missing_annotation_count += 1
+  logging.info(
+      '%d images are missing panoptic annotations.', missing_annotation_count)
+
+  return img_to_panoptic_annotation, is_category_thing
+
+
+def _load_images_info(images_info_file):
+  with tf.io.gfile.GFile(images_info_file, 'r') as fid:
+    info_dict = json.load(fid)
+  return info_dict['images']
+
+
+def generate_annotations(images, image_dirs,
+                         panoptic_masks_dir=None,
+                         img_to_obj_annotation=None,
+                         img_to_caption_annotation=None,
+                         img_to_panoptic_annotation=None,
+                         is_category_thing=None,
+                         id_to_name_map=None,
+                         include_panoptic_masks=False,
+                         include_masks=False):
+  """Generator for COCO annotations."""
+  
+  for image in images:
+    object_annotation = (img_to_obj_annotation.get(image['id'], None) if
+                         img_to_obj_annotation else None)
+
+    caption_annotaion = (img_to_caption_annotation.get(image['id'], None) if
+                         img_to_caption_annotation else None)
+
+    panoptic_annotation = (img_to_panoptic_annotation.get(image['id'], None) if
+                           img_to_panoptic_annotation else None)
+    yield (image, image_dirs, panoptic_masks_dir, object_annotation,
+           id_to_name_map, caption_annotaion, panoptic_annotation,
+           is_category_thing, include_panoptic_masks, include_masks)
+
+
+def _create_tf_record_from_coco_annotations(images_info_file,
+                                            image_dirs,
+                                            output_path,
+                                            num_shards,
+                                            object_annotations_file=None,
+                                            caption_annotations_file=None,
+                                            panoptic_masks_dir=None,
+                                            panoptic_annotations_file=None,
+                                            include_panoptic_masks=False,
+                                            include_masks=False):
+  """Loads COCO annotation json files and converts to tf.Record format.
+
+  Args:
+    images_info_file: JSON file containing image info. The number of tf.Examples
+      in the output tf Record files is exactly equal to the number of image info
+      entries in this file. This can be any of train/val/test annotation json
+      files Eg. 'image_info_test-dev2017.json',
+      'instance_annotations_train2017.json',
+      'caption_annotations_train2017.json', etc.
+    image_dirs: List of directories containing the image files.
+    output_path: Path to output tf.Record file.
+    num_shards: Number of output files to create.
+    object_annotations_file: JSON file containing bounding box annotations.
+    caption_annotations_file: JSON file containing caption annotations.
+    panoptic_masks_dir: Directory containing panoptic masks.
+    panoptic_annotations_file: JSON file containing panoptic annotations.
+    include_panoptic_masks: Whether to include 'category_mask'
+      and 'instance_mask', which is required by the panoptic quality evaluator.
+    include_masks: Whether to include instance segmentations masks
+      (PNG encoded) in the result. default: False.
+  """
+
+  logging.info('writing to output path: %s', output_path)
+
+  images = _load_images_info(images_info_file)
+
+  img_to_obj_annotation = None
+  img_to_caption_annotation = None
+  id_to_name_map = None
+  img_to_panoptic_annotation = None
+  is_category_thing = None
+  if object_annotations_file:
+    img_to_obj_annotation, id_to_name_map = (
+        _load_object_annotations(object_annotations_file))
+  if caption_annotations_file:
+    img_to_caption_annotation = (
+        _load_caption_annotations(caption_annotations_file))
+  if panoptic_annotations_file:
+    img_to_panoptic_annotation, is_category_thing = (
+        _load_panoptic_annotations(panoptic_annotations_file))
+
+  coco_annotations_iter = generate_annotations(
+      images=images,
+      image_dirs=image_dirs,
+      panoptic_masks_dir=panoptic_masks_dir,
+      img_to_obj_annotation=img_to_obj_annotation,
+      img_to_caption_annotation=img_to_caption_annotation,
+      img_to_panoptic_annotation=img_to_panoptic_annotation,
+      is_category_thing=is_category_thing,
+      id_to_name_map=id_to_name_map,
+      include_panoptic_masks=include_panoptic_masks,
+      include_masks=include_masks)
+
+  num_skipped = tfrecord_lib.write_tf_record_dataset(
+      output_path, coco_annotations_iter, create_tf_example, num_shards,
+      multiple_processes=_NUM_PROCESSES.value)
+
+  logging.info('Finished writing, skipped %d annotations.', num_skipped)
+
+
+def main(_):
+  assert FLAGS.image_dir, '`image_dir` missing.'
+  assert (FLAGS.image_info_file or FLAGS.object_annotations_file or
+          FLAGS.caption_annotations_file), ('All annotation files are '
+                                            'missing.')
+  if FLAGS.image_info_file:
+    images_info_file = FLAGS.image_info_file
+  elif FLAGS.object_annotations_file:
+    images_info_file = FLAGS.object_annotations_file
+  else:
+    images_info_file = FLAGS.caption_annotations_file
+
+  directory = os.path.dirname(FLAGS.output_file_prefix)
+  if not tf.io.gfile.isdir(directory):
+    tf.io.gfile.makedirs(directory)
+
+  _create_tf_record_from_coco_annotations(images_info_file, FLAGS.image_dir,
+                                          FLAGS.output_file_prefix,
+                                          FLAGS.num_shards,
+                                          FLAGS.object_annotations_file,
+                                          FLAGS.caption_annotations_file,
+                                          FLAGS.panoptic_masks_dir,
+                                          FLAGS.panoptic_annotations_file,
+                                          FLAGS.include_panoptic_masks,
+                                          FLAGS.include_masks)
+
+
+if __name__ == '__main__':
+  app.run(main)
diff --git a/models/official/projects/maskformer/data/create_tf_records.sh b/models/official/projects/maskformer/data/create_tf_records.sh
index 2dd62a93..d98d8906 100755
--- a/models/official/projects/maskformer/data/create_tf_records.sh
+++ b/models/official/projects/maskformer/data/create_tf_records.sh
@@ -1,9 +1,9 @@
-DATA_DIR=$1
+# DATA_DIR=$1
 # wget http://images.cocodataset.org/annotations/annotations_trainval2017.zip -P $DATA_DIR
 # wget http://images.cocodataset.org/zips/val2017.zip -P $DATA_DIR
 # wget http://images.cocodataset.org/annotations/panoptic_annotations_trainval2017.zip -P $DATA_DIR
 # wget http://images.cocodataset.org/zips/train2017.zip -P $DATA_DIR
-http://images.cocodataset.org/annotations/panoptic_annotations_trainval2017.zip
+
 # downloaded_panoptic=true
 # run_dir=$(pwd)
 # if ! cd "$DATA_DIR"; then
@@ -35,7 +35,7 @@ http://images.cocodataset.org/annotations/panoptic_annotations_trainval2017.zip
 # else
 #   $($GET_TRAIN)
 # fi
-cd /depot/qqiu/data/vishal/projects/tf_maskformer_integration/
+# # cd /depot/qqiu/data/vishal/projects/tf-maskformer/models
 
 # unzip $DATA_DIR/"*".zip -d $DATA_DIR
 # mkdir $DATA_DIR/zips
@@ -46,7 +46,7 @@ cd /depot/qqiu/data/vishal/projects/tf_maskformer_integration/
 #   unzip $DATA_DIR/annotations/panoptic_val2017.zip -d $DATA_DIR
 # fi
 
-python3 official/vision/data/create_coco_tf_record.py \
+python3 create_coco_tf_record.py \
   --logtostderr \
   --image_dir="$DATA_DIR/val2017" \
   --object_annotations_file="$DATA_DIR/annotations/instances_val2017.json" \
@@ -54,16 +54,15 @@ python3 official/vision/data/create_coco_tf_record.py \
   --panoptic_annotations_file="$DATA_DIR/annotations/panoptic_val2017.json" \
   --panoptic_masks_dir="$DATA_DIR/panoptic_val2017" \
   --num_shards=8 \
-  --include_masks \
   --include_panoptic_masks
 
-python3 official/vision/data/create_coco_tf_record.py \
-  --logtostderr \
-  --image_dir="$DATA_DIR/train2017" \
-  --object_annotations_file="$DATA_DIR/annotations/instances_train2017.json" \
-  --output_file_prefix="$DATA_DIR/tfrecords/train" \
-  --panoptic_annotations_file="$DATA_DIR/annotations/panoptic_train2017.json" \
-  --panoptic_masks_dir="$DATA_DIR/panoptic_train2017" \
-  --num_shards=150 \
-  --include_masks \
-  --include_panoptic_masks
+# python3 create_coco_tf_record.py \
+#   --logtostderr \
+#   --image_dir="$DATA_DIR/train2017" \
+#   --object_annotations_file="$DATA_DIR/annotations/instances_train2017.json" \
+#   --output_file_prefix="$DATA_DIR/tfrecords/train" \
+#   --panoptic_annotations_file="$DATA_DIR/annotations/panoptic_train2017.json" \
+#   --panoptic_masks_dir="$DATA_DIR/panoptic_train2017" \
+#   --num_shards=150 \
+#   --include_panoptic_masks\
+#   --num_processes 16\
diff --git a/models/official/projects/maskformer/data/wget-log b/models/official/projects/maskformer/data/wget-log
index b2a28873..26e4ae51 100644
--- a/models/official/projects/maskformer/data/wget-log
+++ b/models/official/projects/maskformer/data/wget-log
@@ -1,11 +1,11 @@
---2023-03-23 20:30:42--  http://images.cocodataset.org/zips/val2017.zip
-Resolving images.cocodataset.org (images.cocodataset.org)... 52.217.226.97, 3.5.1.162, 52.217.224.49, ...
-Connecting to images.cocodataset.org (images.cocodataset.org)|52.217.226.97|:80... connected.
+--2023-06-18 01:02:28--  http://images.cocodataset.org/annotations/annotations_trainval2017.zip
+Resolving images.cocodataset.org (images.cocodataset.org)... 52.216.49.41, 52.217.234.249, 3.5.25.137, ...
+Connecting to images.cocodataset.org (images.cocodataset.org)|52.216.49.41|:80... connected.
 HTTP request sent, awaiting response... 200 OK
-Length: 815585330 (778M) [application/zip]
-Saving to: ‘/depot/davisjam/data/vishal/datasets/coco/val2017.zip’
+Length: 252907541 (241M) [application/zip]
+Saving to: ‘/depot/davisjam/data/vishal/datasets/coco/annotations_trainval2017.zip’
 
-58% [==============================================================>                                            ] 480,496,948 68.0MB/s  eta 6s     60% [===============================================================>                                           ] 494,895,868 68.0MB/s  eta 5s     62% [=================================================================>                                         ] 509,051,732 68.0MB/s  eta 5s     64% [===================================================================>                                       ] 523,434,268 68.3MB/s  eta 5s     65% [=====================================================================>                                     ] 537,768,328 68.4MB/s  eta 5s     67% [=======================================================================>                                   ] 552,232,108 68.5MB/s  eta 5s     69% [=========================================================================>                                 ] 566,760,748 68.4MB/s  eta 4s     71% [===========================================================================>                               ] 581,224,528 68.5MB/s  eta 4s     73% [=============================================================================>                             ] 595,631,640 68.5MB/s  eta 4s     74% [===============================================================================>                           ] 610,022,368 68.6MB/s  eta 4s     76% [================================================================================>                          ] 624,226,708 68.4MB/s  eta 4s     78% [==================================================================================>                        ] 638,495,908 68.4MB/s  eta 3s     80% [====================================================================================>                      ] 652,765,108 68.4MB/s  eta 3s     81% [======================================================================================>                    ] 667,293,748 68.5MB/s  eta 3s     83% [========================================================================================>                  ] 681,845,448 68.5MB/s  eta 3s     85% [==========================================================================================>                ] 696,415,888 68.5MB/s  eta 3s     87% [============================================================================================>              ] 710,879,668 68.6MB/s  eta 1s     88% [==============================================================================================>            ] 725,278,588 68.5MB/s  eta 1s     90% [================================================================================================>          ] 739,462,316 68.5MB/s  eta 1s     92% [=================================================================================================>         ] 753,752,128 68.4MB/s  eta 1s     94% [===================================================================================================>       ] 768,086,188 68.5MB/s  eta 1s     95% [=====================================================================================================>     ] 782,614,828 68.4MB/s  eta 0s     97% [=======================================================================================================>   ] 797,143,468 68.5MB/s  eta 0s     99% [=========================================================================================================> ] 811,477,528 68.5MB/s  eta 0s     100%[==========================================================================================================>] 815,585,330 68.4MB/s   in 12s    
+91% [==========================================================================================================================>           ] 232,219,579 34.9MB/s  eta 2s     94% [==============================================================================================================================>       ] 240,023,479 35.0MB/s  eta 2s     97% [==================================================================================================================================>   ] 247,806,679 35.1MB/s  eta 0s     100%[=====================================================================================================================================>] 252,907,541 35.3MB/s   in 8.6s   
 
-2023-03-23 20:30:54 (67.6 MB/s) - ‘/depot/davisjam/data/vishal/datasets/coco/val2017.zip’ saved [815585330/815585330]
+2023-06-18 01:02:37 (28.0 MB/s) - ‘/depot/davisjam/data/vishal/datasets/coco/annotations_trainval2017.zip’ saved [252907541/252907541]
 
diff --git a/models/official/projects/maskformer/dataloaders/__pycache__/input_reader.cpython-39.pyc b/models/official/projects/maskformer/dataloaders/__pycache__/input_reader.cpython-39.pyc
index 289843bc..367229f8 100644
Binary files a/models/official/projects/maskformer/dataloaders/__pycache__/input_reader.cpython-39.pyc and b/models/official/projects/maskformer/dataloaders/__pycache__/input_reader.cpython-39.pyc differ
diff --git a/models/official/projects/maskformer/dataloaders/__pycache__/panoptic_input.cpython-39.pyc b/models/official/projects/maskformer/dataloaders/__pycache__/panoptic_input.cpython-39.pyc
index b763aa0b..ff75046c 100644
Binary files a/models/official/projects/maskformer/dataloaders/__pycache__/panoptic_input.cpython-39.pyc and b/models/official/projects/maskformer/dataloaders/__pycache__/panoptic_input.cpython-39.pyc differ
diff --git a/models/official/projects/maskformer/dataloaders/factory.py b/models/official/projects/maskformer/dataloaders/factory.py
index 187d54f5..0389e273 100644
--- a/models/official/projects/maskformer/dataloaders/factory.py
+++ b/models/official/projects/maskformer/dataloaders/factory.py
@@ -54,4 +54,4 @@ def parser_generator(params, mode):
     else:
         raise ValueError('Parser %s is not supported.' % params.architecture.parser)
 
-    return parser_fn
+    return parser_fn
\ No newline at end of file
diff --git a/models/official/projects/maskformer/dataloaders/input_reader.py b/models/official/projects/maskformer/dataloaders/input_reader.py
index 8bd35c0d..2da163c0 100644
--- a/models/official/projects/maskformer/dataloaders/input_reader.py
+++ b/models/official/projects/maskformer/dataloaders/input_reader.py
@@ -14,27 +14,20 @@
 
 """Data loader and input processing."""
 
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
+from official.core import config_definitions as cfg
 from typing import Optional, Text
 import tensorflow as tf
-# import factory
-# from official.projects.configs import mode_keys as ModeKeys
-from official.modeling.hyperparams import params_dict
 
 
 class InputFn(object):
     """Input function that creates dataset from files."""
 
     def __init__(self,
-                 file_pattern: Text,
-                 params: params_dict.ParamsDict,
-                 mode: Text,
-                 batch_size: int,
+                 params: cfg.DataConfig,
+                 dataset_fn,
+                 parser_fn,
                  num_examples: Optional[int] = -1):
         """Initialize.
-
         Args:
           file_pattern: the file pattern for the data example (TFRecords).
           params: the parameter object for constructing example parser and model.
@@ -44,16 +37,18 @@ def __init__(self,
             tf.errors.OutOfRangeError after that. If non-positive, it will be
             ignored.
         """
-        assert file_pattern is not None
-        assert mode is not None
-        assert batch_size is not None
-        self._file_pattern = file_pattern
-        self._mode = mode
-        self._is_training = (mode == ModeKeys.TRAIN)
-        self._batch_size = batch_size
+        self._is_training = params.is_training
+        self._file_pattern = params.input_path
+        
+        self._batch_size = params.global_batch_size
+        self._shuffle_buffer_size = params.shuffle_buffer_size
         self._num_examples = num_examples
-        self._parser_fn = factory.parser_generator(params, mode)
-        self._dataset_fn = tf.data.TFRecordDataset
+        self._parser_fn = parser_fn
+        
+        self._dataset_fn = dataset_fn
+        if dataset_fn is None:
+            
+            self._dataset_fn = tf.data.TFRecordDataset
 
         self._input_sharding = (not self._is_training)
         try:
@@ -66,24 +61,23 @@ def __init__(self,
 
     def __call__(self, ctx=None, batch_size: int = None):
         """Provides tf.data.Dataset object.
-
         Args:
           ctx: context object.
           batch_size: expected batch size input data.
-
         Returns:
           tf.data.Dataset object.
         """
         if not batch_size:
             batch_size = self._batch_size
         assert batch_size is not None
-        dataset = tf.data.Dataset.list_files(
-            self._file_pattern, shuffle=self._is_training)
-
+        dataset = tf.data.Dataset.list_files(self._file_pattern,
+           shuffle=self._is_training)
+        
+        
         if self._input_sharding and ctx and ctx.num_input_pipelines > 1:
             dataset = dataset.shard(ctx.num_input_pipelines, ctx.input_pipeline_id)
         dataset = dataset.cache()
-
+        
         if self._is_training:
             dataset = dataset.repeat()
 
@@ -91,15 +85,17 @@ def __call__(self, ctx=None, batch_size: int = None):
             map_func=self._dataset_fn,
             cycle_length=32,
             num_parallel_calls=tf.data.experimental.AUTOTUNE)
-
+        
         if self._is_training:
-            dataset = dataset.shuffle(1000)
+            dataset = dataset.shuffle(self._shuffle_buffer_size)
         if self._num_examples > 0:
             dataset = dataset.take(self._num_examples)
-
+        
         # Parses the fetched records to input tensors for model function.
+        
         dataset = dataset.map(
             self._parser_fn, num_parallel_calls=tf.data.experimental.AUTOTUNE)
+        
         dataset = dataset.batch(batch_size, drop_remainder=True)
         dataset = dataset.prefetch(tf.data.experimental.AUTOTUNE)
-        return dataset
+        return dataset
\ No newline at end of file
diff --git a/models/official/projects/maskformer/dataloaders/panoptic_input.py b/models/official/projects/maskformer/dataloaders/panoptic_input.py
index b9524967..a9c16bcd 100644
--- a/models/official/projects/maskformer/dataloaders/panoptic_input.py
+++ b/models/official/projects/maskformer/dataloaders/panoptic_input.py
@@ -18,14 +18,148 @@
 
 import numpy as np
 import tensorflow as tf
-
-from official.vision.configs import common
+from loguru import logger
 from official.vision.dataloaders import parser
 from official.vision.dataloaders import tf_example_decoder
 from official.vision.ops import augment
 from official.vision.ops import preprocess_ops
-from official.projects.maskformer.dataloaders import input_reader
-# from official.projects.maskformer.configs import mode_keys as ModeKeys
+from official.core import config_definitions as cfg
+tf.compat.v1.enable_eager_execution()
+COCO_CATEGORIES = [
+    {"color": [220, 20, 60], "isthing": 1, "id": 1, "name": "person"},
+    {"color": [119, 11, 32], "isthing": 1, "id": 2, "name": "bicycle"},
+    {"color": [0, 0, 142], "isthing": 1, "id": 3, "name": "car"},
+    {"color": [0, 0, 230], "isthing": 1, "id": 4, "name": "motorcycle"},
+    {"color": [106, 0, 228], "isthing": 1, "id": 5, "name": "airplane"},
+    {"color": [0, 60, 10], "isthing": 1, "id": 6, "name": "bus"},
+    {"color": [0, 80, 100], "isthing": 1, "id": 7, "name": "train"},
+    {"color": [0, 0, 70], "isthing": 1, "id": 8, "name": "truck"},
+    {"color": [0, 0, 192], "isthing": 1, "id": 9, "name": "boat"},
+    {"color": [250, 170, 30], "isthing": 1, "id": 10, "name": "traffic light"},
+    {"color": [100, 170, 30], "isthing": 1, "id": 11, "name": "fire hydrant"},
+    {"color": [220, 220, 0], "isthing": 1, "id": 13, "name": "stop sign"},
+    {"color": [175, 116, 175], "isthing": 1, "id": 14, "name": "parking meter"},
+    {"color": [250, 0, 30], "isthing": 1, "id": 15, "name": "bench"},
+    {"color": [165, 42, 42], "isthing": 1, "id": 16, "name": "bird"},
+    {"color": [255, 77, 255], "isthing": 1, "id": 17, "name": "cat"},
+    {"color": [0, 226, 252], "isthing": 1, "id": 18, "name": "dog"},
+    {"color": [182, 182, 255], "isthing": 1, "id": 19, "name": "horse"},
+    {"color": [0, 82, 0], "isthing": 1, "id": 20, "name": "sheep"},
+    {"color": [120, 166, 157], "isthing": 1, "id": 21, "name": "cow"},
+    {"color": [110, 76, 0], "isthing": 1, "id": 22, "name": "elephant"},
+    {"color": [174, 57, 255], "isthing": 1, "id": 23, "name": "bear"},
+    {"color": [199, 100, 0], "isthing": 1, "id": 24, "name": "zebra"},
+    {"color": [72, 0, 118], "isthing": 1, "id": 25, "name": "giraffe"},
+    {"color": [255, 179, 240], "isthing": 1, "id": 27, "name": "backpack"},
+    {"color": [0, 125, 92], "isthing": 1, "id": 28, "name": "umbrella"},
+    {"color": [209, 0, 151], "isthing": 1, "id": 31, "name": "handbag"},
+    {"color": [188, 208, 182], "isthing": 1, "id": 32, "name": "tie"},
+    {"color": [0, 220, 176], "isthing": 1, "id": 33, "name": "suitcase"},
+    {"color": [255, 99, 164], "isthing": 1, "id": 34, "name": "frisbee"},
+    {"color": [92, 0, 73], "isthing": 1, "id": 35, "name": "skis"},
+    {"color": [133, 129, 255], "isthing": 1, "id": 36, "name": "snowboard"},
+    {"color": [78, 180, 255], "isthing": 1, "id": 37, "name": "sports ball"},
+    {"color": [0, 228, 0], "isthing": 1, "id": 38, "name": "kite"},
+    {"color": [174, 255, 243], "isthing": 1, "id": 39, "name": "baseball bat"},
+    {"color": [45, 89, 255], "isthing": 1, "id": 40, "name": "baseball glove"},
+    {"color": [134, 134, 103], "isthing": 1, "id": 41, "name": "skateboard"},
+    {"color": [145, 148, 174], "isthing": 1, "id": 42, "name": "surfboard"},
+    {"color": [255, 208, 186], "isthing": 1, "id": 43, "name": "tennis racket"},
+    {"color": [197, 226, 255], "isthing": 1, "id": 44, "name": "bottle"},
+    {"color": [171, 134, 1], "isthing": 1, "id": 46, "name": "wine glass"},
+    {"color": [109, 63, 54], "isthing": 1, "id": 47, "name": "cup"},
+    {"color": [207, 138, 255], "isthing": 1, "id": 48, "name": "fork"},
+    {"color": [151, 0, 95], "isthing": 1, "id": 49, "name": "knife"},
+    {"color": [9, 80, 61], "isthing": 1, "id": 50, "name": "spoon"},
+    {"color": [84, 105, 51], "isthing": 1, "id": 51, "name": "bowl"},
+    {"color": [74, 65, 105], "isthing": 1, "id": 52, "name": "banana"},
+    {"color": [166, 196, 102], "isthing": 1, "id": 53, "name": "apple"},
+    {"color": [208, 195, 210], "isthing": 1, "id": 54, "name": "sandwich"},
+    {"color": [255, 109, 65], "isthing": 1, "id": 55, "name": "orange"},
+    {"color": [0, 143, 149], "isthing": 1, "id": 56, "name": "broccoli"},
+    {"color": [179, 0, 194], "isthing": 1, "id": 57, "name": "carrot"},
+    {"color": [209, 99, 106], "isthing": 1, "id": 58, "name": "hot dog"},
+    {"color": [5, 121, 0], "isthing": 1, "id": 59, "name": "pizza"},
+    {"color": [227, 255, 205], "isthing": 1, "id": 60, "name": "donut"},
+    {"color": [147, 186, 208], "isthing": 1, "id": 61, "name": "cake"},
+    {"color": [153, 69, 1], "isthing": 1, "id": 62, "name": "chair"},
+    {"color": [3, 95, 161], "isthing": 1, "id": 63, "name": "couch"},
+    {"color": [163, 255, 0], "isthing": 1, "id": 64, "name": "potted plant"},
+    {"color": [119, 0, 170], "isthing": 1, "id": 65, "name": "bed"},
+    {"color": [0, 182, 199], "isthing": 1, "id": 67, "name": "dining table"},
+    {"color": [0, 165, 120], "isthing": 1, "id": 70, "name": "toilet"},
+    {"color": [183, 130, 88], "isthing": 1, "id": 72, "name": "tv"},
+    {"color": [95, 32, 0], "isthing": 1, "id": 73, "name": "laptop"},
+    {"color": [130, 114, 135], "isthing": 1, "id": 74, "name": "mouse"},
+    {"color": [110, 129, 133], "isthing": 1, "id": 75, "name": "remote"},
+    {"color": [166, 74, 118], "isthing": 1, "id": 76, "name": "keyboard"},
+    {"color": [219, 142, 185], "isthing": 1, "id": 77, "name": "cell phone"},
+    {"color": [79, 210, 114], "isthing": 1, "id": 78, "name": "microwave"},
+    {"color": [178, 90, 62], "isthing": 1, "id": 79, "name": "oven"},
+    {"color": [65, 70, 15], "isthing": 1, "id": 80, "name": "toaster"},
+    {"color": [127, 167, 115], "isthing": 1, "id": 81, "name": "sink"},
+    {"color": [59, 105, 106], "isthing": 1, "id": 82, "name": "refrigerator"},
+    {"color": [142, 108, 45], "isthing": 1, "id": 84, "name": "book"},
+    {"color": [196, 172, 0], "isthing": 1, "id": 85, "name": "clock"},
+    {"color": [95, 54, 80], "isthing": 1, "id": 86, "name": "vase"},
+    {"color": [128, 76, 255], "isthing": 1, "id": 87, "name": "scissors"},
+    {"color": [201, 57, 1], "isthing": 1, "id": 88, "name": "teddy bear"},
+    {"color": [246, 0, 122], "isthing": 1, "id": 89, "name": "hair drier"},
+    {"color": [191, 162, 208], "isthing": 1, "id": 90, "name": "toothbrush"},
+    {"color": [255, 255, 128], "isthing": 0, "id": 92, "name": "banner"},
+    {"color": [147, 211, 203], "isthing": 0, "id": 93, "name": "blanket"},
+    {"color": [150, 100, 100], "isthing": 0, "id": 95, "name": "bridge"},
+    {"color": [168, 171, 172], "isthing": 0, "id": 100, "name": "cardboard"},
+    {"color": [146, 112, 198], "isthing": 0, "id": 107, "name": "counter"},
+    {"color": [210, 170, 100], "isthing": 0, "id": 109, "name": "curtain"},
+    {"color": [92, 136, 89], "isthing": 0, "id": 112, "name": "door-stuff"},
+    {"color": [218, 88, 184], "isthing": 0, "id": 118, "name": "floor-wood"},
+    {"color": [241, 129, 0], "isthing": 0, "id": 119, "name": "flower"},
+    {"color": [217, 17, 255], "isthing": 0, "id": 122, "name": "fruit"},
+    {"color": [124, 74, 181], "isthing": 0, "id": 125, "name": "gravel"},
+    {"color": [70, 70, 70], "isthing": 0, "id": 128, "name": "house"},
+    {"color": [255, 228, 255], "isthing": 0, "id": 130, "name": "light"},
+    {"color": [154, 208, 0], "isthing": 0, "id": 133, "name": "mirror-stuff"},
+    {"color": [193, 0, 92], "isthing": 0, "id": 138, "name": "net"},
+    {"color": [76, 91, 113], "isthing": 0, "id": 141, "name": "pillow"},
+    {"color": [255, 180, 195], "isthing": 0, "id": 144, "name": "platform"},
+    {"color": [106, 154, 176], "isthing": 0, "id": 145, "name": "playingfield"},
+    {"color": [230, 150, 140], "isthing": 0, "id": 147, "name": "railroad"},
+    {"color": [60, 143, 255], "isthing": 0, "id": 148, "name": "river"},
+    {"color": [128, 64, 128], "isthing": 0, "id": 149, "name": "road"},
+    {"color": [92, 82, 55], "isthing": 0, "id": 151, "name": "roof"},
+    {"color": [254, 212, 124], "isthing": 0, "id": 154, "name": "sand"},
+    {"color": [73, 77, 174], "isthing": 0, "id": 155, "name": "sea"},
+    {"color": [255, 160, 98], "isthing": 0, "id": 156, "name": "shelf"},
+    {"color": [255, 255, 255], "isthing": 0, "id": 159, "name": "snow"},
+    {"color": [104, 84, 109], "isthing": 0, "id": 161, "name": "stairs"},
+    {"color": [169, 164, 131], "isthing": 0, "id": 166, "name": "tent"},
+    {"color": [225, 199, 255], "isthing": 0, "id": 168, "name": "towel"},
+    {"color": [137, 54, 74], "isthing": 0, "id": 171, "name": "wall-brick"},
+    {"color": [135, 158, 223], "isthing": 0, "id": 175, "name": "wall-stone"},
+    {"color": [7, 246, 231], "isthing": 0, "id": 176, "name": "wall-tile"},
+    {"color": [107, 255, 200], "isthing": 0, "id": 177, "name": "wall-wood"},
+    {"color": [58, 41, 149], "isthing": 0, "id": 178, "name": "water-other"},
+    {"color": [183, 121, 142], "isthing": 0, "id": 180, "name": "window-blind"},
+    {"color": [255, 73, 97], "isthing": 0, "id": 181, "name": "window-other"},
+    {"color": [107, 142, 35], "isthing": 0, "id": 184, "name": "tree-merged"},
+    {"color": [190, 153, 153], "isthing": 0, "id": 185, "name": "fence-merged"},
+    {"color": [146, 139, 141], "isthing": 0, "id": 186, "name": "ceiling-merged"},
+    {"color": [70, 130, 180], "isthing": 0, "id": 187, "name": "sky-other-merged"},
+    {"color": [134, 199, 156], "isthing": 0, "id": 188, "name": "cabinet-merged"},
+    {"color": [209, 226, 140], "isthing": 0, "id": 189, "name": "table-merged"},
+    {"color": [96, 36, 108], "isthing": 0, "id": 190, "name": "floor-other-merged"},
+    {"color": [96, 96, 96], "isthing": 0, "id": 191, "name": "pavement-merged"},
+    {"color": [64, 170, 64], "isthing": 0, "id": 192, "name": "mountain-merged"},
+    {"color": [152, 251, 152], "isthing": 0, "id": 193, "name": "grass-merged"},
+    {"color": [208, 229, 228], "isthing": 0, "id": 194, "name": "dirt-merged"},
+    {"color": [206, 186, 171], "isthing": 0, "id": 195, "name": "paper-merged"},
+    {"color": [152, 161, 64], "isthing": 0, "id": 196, "name": "food-other-merged"},
+    {"color": [116, 112, 0], "isthing": 0, "id": 197, "name": "building-other-merged"},
+    {"color": [0, 114, 143], "isthing": 0, "id": 198, "name": "rock-merged"},
+    {"color": [102, 102, 156], "isthing": 0, "id": 199, "name": "wall-other-merged"},
+    {"color": [250, 141, 255], "isthing": 0, "id": 200, "name": "rug-merged"},
+]
 
 def _compute_gaussian_from_std(sigma):
     """Computes the Gaussian and its size from a given standard deviation."""
@@ -53,31 +187,49 @@ def __init__(
             regenerate_source_id=regenerate_source_id)
         self._panoptic_category_mask_key = panoptic_category_mask_key
         self._panoptic_instance_mask_key = panoptic_instance_mask_key
-
+        self._panoptic_contigious_mask_key = 'image/panoptic/contiguous_mask'
+        self._class_ids_key = 'image/panoptic/class_ids'
+        self._image_height_key = 'image/height'
+        self._image_width_key = 'image/width'
+        self._image_key = ""
         self._panoptic_keys_to_features = {
-            panoptic_category_mask_key:
+            self._panoptic_category_mask_key:
+                tf.io.FixedLenFeature((), tf.string, default_value=''),
+            self._panoptic_instance_mask_key:
+                tf.io.FixedLenFeature((), tf.string, default_value=''),
+            self._panoptic_contigious_mask_key:
                 tf.io.FixedLenFeature((), tf.string, default_value=''),
-            panoptic_instance_mask_key:
-                tf.io.FixedLenFeature((), tf.string, default_value='')
+            self._class_ids_key:
+                tf.io.VarLenFeature(tf.int64),        
         }
 
+
     def decode(self, serialized_example):
         decoded_tensors = super(TfExampleDecoder,
                                 self).decode(serialized_example)
+        
+    
         parsed_tensors = tf.io.parse_single_example(
             serialized_example, self._panoptic_keys_to_features)
-
-        category_mask = tf.io.decode_image(
+        
+        category_mask = tf.io.decode_png(
             parsed_tensors[self._panoptic_category_mask_key], channels=1)
-        instance_mask = tf.io.decode_image(
+        instance_mask = tf.io.decode_png(
             parsed_tensors[self._panoptic_instance_mask_key], channels=1)
+        contigious_mask = tf.io.decode_png(
+            parsed_tensors[self._panoptic_contigious_mask_key], channels=1)
+        class_ids = parsed_tensors[self._class_ids_key]
+
         category_mask.set_shape([None, None, 1])
         instance_mask.set_shape([None, None, 1])
-
+        contigious_mask.set_shape([None, None, 1])
         decoded_tensors.update({
             'groundtruth_panoptic_category_mask': category_mask,
-            'groundtruth_panoptic_instance_mask': instance_mask
+            'groundtruth_panoptic_instance_mask': instance_mask,
+            'groundtruth_panoptic_contigious_mask': contigious_mask,
+            'groundtruth_panoptic_class_ids': class_ids,
         })
+        
         return decoded_tensors
 
 
@@ -86,29 +238,10 @@ class mask_former_parser(parser.Parser):
 
     def __init__(
             self,
-            output_size: List[int] = None,
-            min_scale: float = 0.3,
-            aspect_ratio_range: List[float] = (0.5, 2.0),
-            min_overlap_params: List[float] = (0.0, 1.4, 0.2, 0.1),
-            max_retry: int = 50,
-            pad_output: bool = True,
-            resize_eval_groundtruth: bool = True,
-            groundtruth_padded_size: Optional[List[int]] = None,
-            ignore_label: int = 0,
-            aug_rand_hflip: bool = True,
-            aug_scale_min: float = 1.0,
-            aug_scale_max: float = 1.0,
-            color_aug_ssd: bool = False,
-            brightness: float = 0.2,
-            saturation: float = 0.3,
-            contrast: float = 0.5,
-            aug_type: Optional[common.Augmentation] = None,
-            sigma: float = 8.0,
-            small_instance_area_threshold: int = 4096,
-            small_instance_weight: float = 3.0,
-            dtype: str = 'float32',
-            seed: int = None,
-            mode: ModeKeys = None):
+            params: cfg.DataConfig,
+            decoder_fn = None,
+            is_training = False,
+            ):
         """Initializes parameters for parsing annotations in the dataset.
     
         Args:
@@ -136,35 +269,42 @@ def __init__(
         """
         
         # general settings
+        self._output_size = params.output_size
+        self._mask_null = 0
+        self._dtype = params.dtype
+        self._pad_output = params.pad_output
+        self._seed = params.seed
+        # TODO
+        self._max_instances = 100
+        self._decoder = decoder_fn
         
-        self._output_size = output_size
-        self._dtype = dtype
-        self._pad_output = pad_output
-        self._seed = seed
-        
-        self._decoder = TfExampleDecoder()
+        if self._pad_output == True and self._output_size is None:
+            raise Exception("Error: no output pad provided")
+        if self._decoder == None:
+            print("assuming default decoder")
+            self._decoder = TfExampleDecoder()
         
-        self._mode = mode
-        if mode == None:
+        self._is_training = is_training
+        if is_training == None:
             print("assuming training mode")
-            self._mode = ModeKeys.TRAIN
+            self._is_training = True
         
-        # Boxes:
-        self._resize_eval_groundtruth = resize_eval_groundtruth
-        if (not resize_eval_groundtruth) and (groundtruth_padded_size is None):
+       
+        self._resize_eval_groundtruth = params.resize_eval_groundtruth
+        if (not params.resize_eval_groundtruth) and (params.groundtruth_padded_size is None):
             raise ValueError(
                 'groundtruth_padded_size ([height, width]) needs to be'
                 'specified when resize_eval_groundtruth is False.')
-        self._groundtruth_padded_size = groundtruth_padded_size
-        self._ignore_label = ignore_label
+        self._groundtruth_padded_size = params.groundtruth_padded_size
+        self._ignore_label = params.ignore_label
 
         # Data augmentation
-        self._aug_rand_hflip = aug_rand_hflip
-        self._aug_scale_min = aug_scale_min
-        self._aug_scale_max = aug_scale_max
+        self._aug_rand_hflip = params.aug_rand_hflip
+        self._aug_scale_min = params.aug_scale_min
+        self._aug_scale_max = params.aug_scale_max
         
         # Auto Augment
-        if aug_type and aug_type.type:
+        if params.aug_type and aug_type.type:
             if aug_type.type == 'autoaug':
                 self._augmenter = augment.AutoAugment(
                     augmentation_name=aug_type.autoaug.augmentation_name,
@@ -177,26 +317,27 @@ def __init__(
             self._augmenter = None
         
         #Cropping:
-        self._min_scale = min_scale
-        self._aspect_ratio_range = aspect_ratio_range
-        self._min_overlap_params = min_overlap_params
-        self._max_retry = max_retry
+        self._min_scale = params.min_scale
+        self._aspect_ratio_range = params.aspect_ratio_range
+        self._min_overlap_params = params.min_overlap_params
+        self._max_retry = params.max_retry
 
 
         
         # color augmentation
-        self._color_aug_ssd = color_aug_ssd
-        self._brightness = brightness
-        self._saturation = saturation
-        self._contrast = contrast
+        self._color_aug_ssd = params.color_aug_ssd
+        self._brightness = params.brightness
+        self._saturation = params.saturation
+        self._contrast = params.contrast
         
-        self._sigma = sigma
+        self._sigma = params.sigma
         self._gaussian, self._gaussian_size = _compute_gaussian_from_std(
             self._sigma)
         self._gaussian = tf.reshape(self._gaussian, shape=[-1])
-        self._small_instance_area_threshold = small_instance_area_threshold
-        self._small_instance_weight = small_instance_weight
+        self._small_instance_area_threshold = params.small_instance_area_threshold
+        self._small_instance_weight = params.small_instance_weight
 
+        
 
     def _resize_and_crop_mask(self, mask, image_info, crop_dims, is_training):
         """Resizes and crops mask using `image_info` dict."""
@@ -205,15 +346,15 @@ def _resize_and_crop_mask(self, mask, image_info, crop_dims, is_training):
         offset = image_info[3, : ]
         im_height = int(image_info[0][0])
         im_width = int(image_info[0][1])
-        print(mask.shape)
-        print(im_height, im_width)
+
+        # print(mask.shape)
         
         mask = tf.reshape(mask, shape=[1, im_height, im_width, 1])
-        print(mask.shape)
+        # print(mask.shape)
         mask += 1
 
         if is_training or self._resize_eval_groundtruth:
-            print("using image offset:",offset)
+            # print("using image offset:",offset)
             mask = preprocess_ops.resize_and_crop_masks(
                 mask,
                 image_scale,
@@ -233,7 +374,8 @@ def _resize_and_crop_mask(self, mask, image_info, crop_dims, is_training):
             mask)
         mask = tf.squeeze(mask, axis=0)
         return mask
-
+    
+        
     def _parse_data(self, data, is_training):
         image = data['image']
         
@@ -249,6 +391,10 @@ def _parse_data(self, data, is_training):
         instance_mask = tf.cast(
             data['groundtruth_panoptic_instance_mask'][:, :, 0],
             dtype=tf.float32)
+        contigious_mask = tf.cast(data['groundtruth_panoptic_contigious_mask'][:, :, 0],
+            dtype=tf.float32)
+        class_ids = tf.sparse.to_dense(data['groundtruth_panoptic_class_ids'], default_value=0)
+        class_ids = tf.cast(class_ids, dtype=tf.float32)
         
         # applies by pixel augmentation (saturation, brightness, contrast)
         if self._color_aug_ssd:
@@ -261,8 +407,8 @@ def _parse_data(self, data, is_training):
             )
         # Flips image randomly during training.
         if self._aug_rand_hflip and is_training:
-            print("doing random flip")
-            masks = tf.stack([category_mask, instance_mask], axis=0)
+            # print("doing random flip")
+            masks = tf.stack([category_mask, instance_mask, contigious_mask], axis=0)
             image, _, masks = preprocess_ops.random_horizontal_flip(
                 image=image, 
                 masks=masks,
@@ -270,17 +416,12 @@ def _parse_data(self, data, is_training):
 
             category_mask = masks[0]
             instance_mask = masks[1]
-            
-            
-
+            contigious_mask = masks[2]
         # Resize and crops image.
-        print(category_mask.shape)
-        print(instance_mask.shape)
-        print(self._output_size)
-        masks = tf.stack([category_mask, instance_mask], axis=0)
-        masks = tf.expand_dims(masks, -1)
-        print("stacked masks:",masks.shape)
         
+        masks = tf.stack([category_mask, instance_mask, contigious_mask], axis=0)
+        masks = tf.expand_dims(masks, -1)
+       
         # Resizes and crops image.
         cropped_image, masks = preprocess_ops.random_crop_image_masks(
             img = image,
@@ -295,24 +436,19 @@ def _parse_data(self, data, is_training):
                                                                       
         category_mask = tf.squeeze(masks[0])
         instance_mask = tf.squeeze(masks[1])
+        contigious_mask = tf.squeeze(masks[2])
         
-        print("categorical shape:",category_mask.shape)
-        print("instance shape:",instance_mask.shape)
-        print("image shape:",cropped_image.shape)
         
         crop_im_size = tf.cast(tf.shape(cropped_image)[0:2], tf.int32)
-
-        print("using padding:", self._output_size)    
-        # resize and pad image from random crop
+        
+        # Resize image
         image, image_info = preprocess_ops.resize_and_crop_image(
             cropped_image,
             self._output_size if self._pad_output else crop_im_size,
             self._output_size if self._pad_output else crop_im_size,
-            aug_scale_min=self._aug_scale_min if self._pad_output or not self._mode == ModeKeys.TRAIN else 1.0,
-            aug_scale_max=self._aug_scale_max  if self._pad_output or not self._mode == ModeKeys.TRAIN else 1.0)
-        
-        print("image info:", image_info)
-        # resize masks according to image
+            aug_scale_min=self._aug_scale_min if self._pad_output or not self._is_training else 1.0,
+            aug_scale_max=self._aug_scale_max  if self._pad_output or not self._is_training else 1.0)
+     
         category_mask = self._resize_and_crop_mask(
             category_mask,
             image_info,
@@ -323,34 +459,45 @@ def _parse_data(self, data, is_training):
             image_info,
             self._output_size if self._pad_output else crop_im_size,
             is_training=is_training)
-        (instance_centers_heatmap,
-            instance_centers_offset,
-            semantic_weights) = self._encode_centers_and_offets(
-                instance_mask=instance_mask[:, :, 0])
+        contigious_mask = self._resize_and_crop_mask(
+            contigious_mask,
+            image_info,
+            self._output_size if self._pad_output else crop_im_size,
+            is_training=is_training)
+        
+        individual_masks = self._get_individual_masks(
+                class_ids=class_ids,contig_instance_mask=contigious_mask)
+
+
+        # Resize image and masks to output size.
+        image = tf.image.resize(image, self._output_size, method='nearest')
+        category_mask = tf.image.resize(category_mask, self._output_size, method='nearest')
+        instance_mask = tf.image.resize(instance_mask, self._output_size, method='nearest')
+        individual_masks = tf.image.resize(individual_masks, self._output_size, method='nearest')
 
-        # Cast image and labels as self._dtype
+        unique_ids = preprocess_ops.clip_or_pad_to_fixed_size(
+                class_ids, self._max_instances)
+       
         image = tf.cast(image, dtype=self._dtype)
         category_mask = tf.cast(category_mask, dtype=self._dtype)
         instance_mask = tf.cast(instance_mask, dtype=self._dtype)
-        instance_centers_heatmap = tf.cast(
-            instance_centers_heatmap, dtype=self._dtype)
-        instance_centers_offset = tf.cast(
-            instance_centers_offset, dtype=self._dtype)
+        individual_masks = tf.cast(individual_masks, dtype=self._dtype)
+        unique_ids =  tf.cast(unique_ids, dtype=self._dtype)
 
         valid_mask = tf.not_equal(
             category_mask, self._ignore_label)
         things_mask = tf.not_equal(
             instance_mask, self._ignore_label)
 
+        
         labels = {
             'category_mask': category_mask,
             'instance_mask': instance_mask,
-            'instance_centers_heatmap': instance_centers_heatmap,
-            'instance_centers_offset': instance_centers_offset,
-            'semantic_weights': semantic_weights,
             'valid_mask': valid_mask,
             'things_mask': things_mask,
-            'image_info': image_info
+            'image_info': image_info,
+            'unique_ids': unique_ids,
+            'individual_masks': individual_masks,
         }
         return image, labels
 
@@ -362,112 +509,48 @@ def _parse_eval_data(self, data):
         """Parses data for evaluation."""
         return self._parse_data(data=data, is_training=False)
 
-    def _encode_centers_and_offets(self, instance_mask):
-        """Generates center heatmaps and offets from instance id mask.
     
-        Args:
-          instance_mask: `tf.Tensor` of shape [height, width] representing
-            groundtruth instance id mask.
-        Returns:
-          instance_centers_heatmap: `tf.Tensor` of shape [height, width, 1]
-          instance_centers_offset: `tf.Tensor` of shape [height, width, 2]
-        """
-        shape = tf.shape(instance_mask)
-        height, width = shape[0], shape[1]
-
-        padding_start = int(3 * self._sigma + 1)
-        padding_end = int(3 * self._sigma + 2)
-
-        # padding should be equal to self._gaussian_size which is calculated
-        # as size = int(6 * sigma + 3)
-        padding = padding_start + padding_end
-
-        instance_centers_heatmap = tf.zeros(
-            shape=[height + padding, width + padding],
-            dtype=tf.float32)
-        centers_offset_y = tf.zeros(
-            shape=[height, width],
-            dtype=tf.float32)
-        centers_offset_x = tf.zeros(
-            shape=[height, width],
-            dtype=tf.float32)
-        semantic_weights = tf.ones(
-            shape=[height, width],
-            dtype=tf.float32)
-
-        unique_instance_ids, _ = tf.unique(tf.reshape(instance_mask, [-1]))
-
-        # The following method for encoding center heatmaps and offets is inspired
-        # by the reference implementation available at
-        # https://github.com/google-research/deeplab2/blob/main/data/sample_generator.py  # pylint: disable=line-too-long
-        for instance_id in unique_instance_ids:
-            if instance_id == self._ignore_label:
-                continue
-
-            mask = tf.equal(instance_mask, instance_id)
-            mask_area = tf.reduce_sum(tf.cast(mask, dtype=tf.float32))
-            mask_indices = tf.cast(tf.where(mask), dtype=tf.float32)
-            mask_center = tf.reduce_mean(mask_indices, axis=0)
-            mask_center_y = tf.cast(tf.round(mask_center[0]), dtype=tf.int32)
-            mask_center_x = tf.cast(tf.round(mask_center[1]), dtype=tf.int32)
-
-            if mask_area < self._small_instance_area_threshold:
-                semantic_weights = tf.where(
-                    mask,
-                    self._small_instance_weight,
-                    semantic_weights)
-
-            gaussian_size = self._gaussian_size
-            indices_y = tf.range(mask_center_y, mask_center_y + gaussian_size)
-            indices_x = tf.range(mask_center_x, mask_center_x + gaussian_size)
-
-            indices = tf.stack(tf.meshgrid(indices_y, indices_x))
-            indices = tf.reshape(
-                indices, shape=[2, gaussian_size * gaussian_size])
-            indices = tf.transpose(indices)
-
-            instance_centers_heatmap = tf.tensor_scatter_nd_max(
-                tensor=instance_centers_heatmap,
-                indices=indices,
-                updates=self._gaussian)
-
-            centers_offset_y = tf.tensor_scatter_nd_update(
-                tensor=centers_offset_y,
-                indices=tf.cast(mask_indices, dtype=tf.int32),
-                updates=tf.cast(mask_center_y, dtype=tf.float32) - mask_indices[:, 0])
-
-            centers_offset_x = tf.tensor_scatter_nd_update(
-                tensor=centers_offset_x,
-                indices=tf.cast(mask_indices, dtype=tf.int32),
-                updates=tf.cast(mask_center_x, dtype=tf.float32) - mask_indices[:, 1])
-
-        instance_centers_heatmap = instance_centers_heatmap[
-                                   padding_start:padding_start + height,
-                                   padding_start:padding_start + width]
-        instance_centers_heatmap = tf.expand_dims(instance_centers_heatmap, axis=-1)
-
-        instance_centers_offset = tf.stack(
-            [centers_offset_y, centers_offset_x],
-            axis=-1)
-
-        return (instance_centers_heatmap,
-                instance_centers_offset,
-                semantic_weights)
+    def _get_individual_masks(self, class_ids, contig_instance_mask):
+        
+        individual_mask_list = tf.TensorArray(tf.float32, size=self._max_instances) 
+        counter = 0
+        
+        for class_id in class_ids:
+            mask = tf.equal(contig_instance_mask, class_id)
+            individual_mask_list = individual_mask_list.write(counter, tf.cast(mask, tf.float32))
+            counter += 1
+
+        for idx in tf.range(100-tf.size(class_ids)):
+            new_mask = tf.zeros(tf.shape(contig_instance_mask))
+            individual_mask_list = individual_mask_list.write(counter, tf.cast(new_mask, tf.float32))
+        
+        # individual_masks = tf.zeros([self._max_instances, self._output_size[0], self._output_size[1], 1], dtype=tf.float32)
+        # unique_instance_ids, _ = tf.unique(tf.reshape(instance_mask, [-1]))
+        # individual_mask_list = tf.TensorArray(tf.float32, size=100) 
+        # counter = 0
+        # for instance_id in unique_instance_ids:
+
+        #     mask = tf.equal(instance_mask, instance_id)
+        #     individual_mask_list = individual_mask_list.write(counter, tf.expand_dims(tf.cast(mask, tf.float32), axis=2))
+        #     counter += 1
+
+        # return (unique_instance_ids, individual_mask_list.stack())
+        # tf.zeros([self._max_instances, self._output_size[0], self._output_size[1], 1], dtype=tf.float32)
+        
+        return individual_mask_list.stack() 
 
     def __call__(self, value):
         """Parses data to an image and associated training labels.
         Args:
           value: a string tensor holding a serialized tf.Example proto.
         Returns:
-          image, labels: if mode == ModeKeys.TRAIN. see _parse_train_data.
-          {'images': image, 'labels': labels}: if mode == ModeKeys.PREDICT
-            or ModeKeys.PREDICT_WITH_GT.
+          image, labels: if is_training, see _parse_train_data.
+          {'images': image, 'labels': labels}: if is_training
         """
-
         with tf.name_scope('parser'):
-            data = self._decoder.decode(value)
-            
-            if self._mode == ModeKeys.TRAIN:
+            data = self._decoder(value)
+
+            if self._is_training:
                 return self._parse_train_data(data)
             else:
-                return self._parse_eval_data(data)
\ No newline at end of file
+                return self._parse_eval_data(data)
diff --git a/models/official/projects/maskformer/losses/.ipynb_checkpoints/inference-checkpoint.py b/models/official/projects/maskformer/losses/.ipynb_checkpoints/inference-checkpoint.py
new file mode 100644
index 00000000..6b326e13
--- /dev/null
+++ b/models/official/projects/maskformer/losses/.ipynb_checkpoints/inference-checkpoint.py
@@ -0,0 +1,67 @@
+import tensorflow as tf
+
+class PanopticInference():
+    def call(self, mask_true, mask_pred):
+        probs = tf.keras.activations.softmax(mask_true, axis=-1)
+        scores = tf.reduce_max(probs, axis=-1)
+        labels = tf.argmax(probs, axis=-1)
+        mask_pred = tf.keras.activations.sigmoid(mask_pred)
+
+        config_num_classes = 171
+        object_mask_threshold = 0.0
+        keep = tf.math.logical_and(tf.math.not_equal(labels, config_num_classes), scores > object_mask_threshold)
+        curr_scores = scores[keep]
+        curr_classes = labels[keep]
+
+        curr_masks = mask_pred[keep]
+        curr_mask_cls = mask_true[keep]
+        curr_mask_cls = tf.slice(curr_mask_cls, [0, 0], [-1, curr_mask_cls.shape[1] - 1])
+
+        curr_prob_masks = tf.reshape(curr_scores, [-1, 1, 1]) * curr_masks
+
+        height, width = tf.shape(curr_masks)[-2:]
+
+        with tf.device(curr_masks.device):
+            panoptic_seg = tf.zeros((height, width), dtype=tf.int32)
+        segments_info = []
+
+        current_segment_id = 0
+
+        if tf.shape(curr_masks)[0] == 0:
+            return panoptic_seg, segments_info
+        else:
+            curr_masks_ids = tf.argmax(curr_prob_masks, axis=0)
+            stuff_memory = {}
+
+            for k in range(curr_classes.shape[0]):
+                pred_class = curr_classes[k].numpy()
+                # is_thing = pred_class in self.metadata.thing_dataset_id_to_contiguous_id.values()
+                is_thing = True # TODO(ibrahim): FIX when get configs.
+
+                mask = curr_masks_ids == k
+                mask_area = tf.reduce_sum(mask).numpy()
+                original_area = tf.reduce_sum(curr_masks[k] >= 0.5).numpy()
+
+                if mask_area > 0 and original_area > 0:
+                    config_overlap_threshold = 0.8
+                    if mask_area / original_area < config_overlap_threshold:
+                        continue
+                    
+                    if not is_thing:
+                        if int(pred_class) in stuff_memory:
+                            panoptic_seg[mask] = stuff_memory[int(pred_class)]
+                            continue
+                        else:
+                            stuff_memory[int(pred_class)] = current_segment_id + 1
+
+                    current_segment_id += 1
+                    panoptic_seg[mask] = current_segment_id
+
+                    segments_info.append({
+                        "id": current_segment_id,
+                        "is_thing": bool(is_thing),
+                        "category_id": int(pred_class),
+                    })
+            
+            return panoptic_seg, segments_info
+
diff --git a/models/official/projects/maskformer/losses/.ipynb_checkpoints/inference_test-checkpoint.py b/models/official/projects/maskformer/losses/.ipynb_checkpoints/inference_test-checkpoint.py
new file mode 100644
index 00000000..afa1e412
--- /dev/null
+++ b/models/official/projects/maskformer/losses/.ipynb_checkpoints/inference_test-checkpoint.py
@@ -0,0 +1,30 @@
+from absl.testing import parameterized
+import tensorflow as tf
+
+from official.projects.maskformer.losses.inference import PanopticInference
+from official.projects.maskformer.maskformer import MaskFormer
+
+class PanopticInferenceTest(tf.test.TestCase, parameterized.TestCase):
+    @parameterized.named_parameters(('test1',))
+    def test_pass_through(self):
+        model = MaskFormer()
+
+        input_image = tf.ones((1, 640, 640, 3))
+
+        expected_class_probs_shape = [1, 100, 172]
+        expected_mask_probs_shape = [1, 160, 160, 100]
+
+        output = model(input_image)
+        self.assertAllEqual(
+            output["class_prob_predictions"].shape.as_list(), expected_class_probs_shape)
+        self.assertAllEqual(
+            output["mask_prob_predictions"].shape.as_list(), expected_mask_probs_shape)
+        
+        out = PanopticInference().call(mask_true=output["class_prob_predictions"], mask_pred=output["mask_prob_predictions"])
+        print(out)
+
+
+
+
+if __name__ == '__main__':
+    tf.test.main()
diff --git a/models/official/projects/maskformer/losses/.ipynb_checkpoints/maskformer_losses-checkpoint.py b/models/official/projects/maskformer/losses/.ipynb_checkpoints/maskformer_losses-checkpoint.py
new file mode 100644
index 00000000..4bfe5edd
--- /dev/null
+++ b/models/official/projects/maskformer/losses/.ipynb_checkpoints/maskformer_losses-checkpoint.py
@@ -0,0 +1,292 @@
+import tensorflow as tf
+from official.vision.losses import focal_loss
+from official.projects.detr.ops import matchers
+from loguru import logger
+tf.compat.v1.enable_eager_execution()
+
+def _max_by_axis(the_list):
+    all_max = the_list[0]
+    for sublist in the_list[1:]:
+        for idx, item in enumerate(sublist):
+            all_max[idx] = max(all_max[idx], item)
+    return all_max
+
+class NestedTensor(object):
+    def __init__(self, tensors, mask=None):
+        self.tensors = tf.convert_to_tensor(tensors)
+        self.mask = tf.convert_to_tensor(mask) if mask is not None else None
+
+    def to(self, device):
+        with tf.device(device):
+            cast_tensor = tf.identity(self.tensors)
+            cast_mask = tf.identity(self.mask) if self.mask is not None else None
+        return NestedTensor(cast_tensor, cast_mask)
+
+    def decompose(self):
+        return self.tensors, self.mask
+
+    def __repr__(self):
+        return str(self.tensors)
+    
+    
+def nested_tensor_from_tensor_list(tensor_list):
+    if tf.rank(tensor_list[0]).numpy() == 3:
+        max_size = _max_by_axis([list(img.shape) for img in tensor_list])
+
+        batch_shape = [len(tensor_list)] + max_size
+        b, c, h, w = batch_shape
+
+        dtype = tensor_list[0].dtype
+        device = tensor_list[0].device
+
+        with tf.device(device):
+            tensor = tf.zeros(batch_shape, dtype=dtype)
+            mask = tf.ones((b, h, w), dtype=tf.bool)
+
+        for img, pad_img, m in zip(tensor_list, tensor, mask):
+            pad_img = tf.Variable(pad_img)
+            pad_img[:img.shape[0], :img.shape[1], :img.shape[2]].assign(img)
+            pad_img = tf.convert_to_tensor(pad_img)
+
+            m = tf.Variable(m)
+            false_tensor = tf.zeros((img.shape[1], img.shape[2]), dtype=tf.bool)
+            m[:img.shape[1], :img.shape[2]].assign(false_tensor)
+            m = tf.convert_to_tensor(m)
+        return NestedTensor(tensor, mask)
+    else:
+        raise ValueError("not supported")
+
+
+class FocalLossMod(focal_loss.FocalLoss):
+    """Implements a Focal loss for segmentation problems.
+    Reference:
+        [Per-Pixel Classification is Not All You Need for Semantic Segmentation](https://arxiv.org/abs/2107.06278).
+    """
+
+    def __init__(self, alpha=0.25, gamma=2):
+        """Initializes `FocalLoss`.
+        Args: 
+        alpha: The `alpha` weight factor for binary class imbalance.
+        gamma: The `gamma` focusing parameter to re-weight loss.
+        reduction and name?
+        """
+        super().__init__(alpha, gamma, reduction='none')
+        # self.background_indices = background_indices
+
+    def call(self, y_true, y_pred):
+        """Invokes the `FocalLoss`.
+        Args:
+        y_true: A tensor of size [batch, num_anchors, num_classes]. 
+        Stores the binary classification lavel for each element in y_pred.
+        y_pred: A tensor of size [batch, num_anchors, num_classes]. 
+        The predictions of each example.
+        num_masks: The number of masks.
+
+        Returns:
+        Loss float `Tensor`.
+        """
+        # background_indices = tf.expand_dims(self.background_indices, axis=0)
+        weighted_loss = super().call(y_true, y_pred)
+        # print("weighted loss :", weighted_loss.shape) #(1, 100, 442368)
+        # mean over all pixels
+        loss = tf.math.reduce_mean(weighted_loss, axis=-1)
+        # logger.debug("loss shape: {}".format(loss.shape))
+        # logger.debug("loss: {}".format(loss))
+        return loss
+
+    def batch(self, y_true, y_pred):
+        """
+        y_true: (b_size, 100 (num objects), h*w)
+        y_pred: (b_size, 100 (num objects), h*w)
+        """
+        hw = tf.cast(tf.shape(y_pred)[-1], dtype=tf.float32) #[100, h, w]
+        prob = tf.keras.activations.sigmoid(y_pred)
+        focal_pos = tf.pow(1 - prob, self._gamma) * tf.nn.sigmoid_cross_entropy_with_logits(labels=tf.ones_like(y_pred), logits=y_pred)
+        focal_neg = tf.pow(prob, self._gamma) * tf.nn.sigmoid_cross_entropy_with_logits(labels=tf.zeros_like(y_pred), logits=y_pred)
+        
+        if self._alpha >= 0:
+            focal_pos = focal_pos * self._alpha
+            focal_neg = focal_neg * (1 - self._alpha)
+        loss = tf.einsum("bnc,bmc->bnm", focal_pos, y_true) + tf.einsum(
+        "bnc,bmc->bnm", focal_neg, (1 - y_true)
+        )
+        return loss / hw
+    
+
+
+class DiceLoss(tf.keras.losses.Loss):
+   
+    def __init__(self):
+        super().__init__(reduction='none')
+
+    def call(self, y_true, y_pred):
+        """
+        y_true: (b size, 100, h*w)
+        """
+       
+        y_pred = tf.reshape(tf.keras.activations.sigmoid(y_pred), (y_pred.shape[0],y_pred.shape[1],-1))
+        y_true = tf.reshape(y_true, (y_true.shape[0],tf.shape(y_true)[1],-1))
+        
+        numerator = 2 * tf.reduce_sum(y_pred * y_true, axis=-1)
+        denominator = tf.reduce_sum(y_pred, axis=-1) + tf.reduce_sum(y_true, axis=-1)
+        loss = 1 - (numerator + 1) / (denominator + 1)
+        
+        return loss
+    
+    def batch(self, y_true, y_pred):
+        # y_pred = tf.keras.activations.sigmoid(y_pred)
+        y_pred = tf.sigmoid(y_pred)
+        y_pred = tf.reshape(y_pred, [y_pred.shape[0], -1, y_pred.shape[1]])
+        y_pred = tf.transpose(y_pred, [0, 2, 1])
+       
+        numerator = 2 * tf.einsum("bnc,bmc->bnm", y_pred, y_true)
+        denominator = tf.reduce_sum(y_pred, axis=-1)[:, tf.newaxis] + tf.expand_dims(tf.reduce_sum(y_true, axis=-1), axis=-1)
+       
+
+        loss = 1 - (numerator + 1) / (denominator + 1)
+        
+        return loss
+
+class Loss:
+    def __init__(self, num_classes, matcher, eos_coef, cost_class = 1, cost_focal = 1, cost_dice = 1):
+       
+        self.num_classes = num_classes
+        self.matcher = matcher
+        self.eos_coef = eos_coef
+        self.cost_class = cost_class
+        self.cost_focal = cost_focal
+        self.cost_dice = cost_dice
+
+    
+    def memory_efficient_matcher(self, outputs, y_true):
+        batch_size, num_queries = outputs["pred_logits"].shape[:2]
+        out_mask = outputs["pred_masks"]
+        out_mask = tf.transpose(out_mask, perm=[0,3,1,2])
+        
+        tgt_ids = tf.cast(y_true["unique_ids"], dtype=tf.int64)
+        
+        with tf.device(out_mask.device):
+            tgt_mask = y_true["individual_masks"]
+        tgt_mask = tf.transpose(tgt_mask, perm=[0,2,3,1,4]) # [b, 100, h, w, 1]
+        cost_class = tf.gather(-tf.nn.softmax(outputs["pred_logits"]), tgt_ids, batch_dims=1, axis=-1)
+        tgt_mask = tf.squeeze(tf.cast(tgt_mask, dtype=tf.float32),axis=-1)
+        
+        tgt_mask = tf.image.resize(tgt_mask, out_mask.shape[-2:], method='bilinear') # [b, h, w, 100]
+        
+        out_mask = tf.transpose(out_mask, perm=[0,2,3,1]) # [b, h, w, 100]
+        
+        out_mask = tf.reshape(out_mask, [tf.shape(out_mask)[0], tf.shape(out_mask)[-1], -1]) # [b, 100, h*w]
+        tgt_mask = tf.reshape(tgt_mask, [tf.shape(tgt_mask)[0],tf.shape(tgt_mask)[-1], -1]) # [b, 100, h*w]
+        
+        cost_focal = FocalLossMod().batch(tgt_mask, out_mask)
+        cost_dice = DiceLoss().batch(tgt_mask, out_mask)
+        
+        total_cost = (
+                self.cost_focal * cost_focal
+                + self.cost_class * cost_class
+                + self.cost_dice * cost_dice
+            )
+        
+        _, inds = matchers.hungarian_matching(total_cost)
+        return inds
+
+    
+    
+    def get_loss(self, batch_size, outputs, y_true, indices):
+        
+        target_index = tf.math.argmax(indices, axis=1) #[batchsize, 100]
+        target_labels = y_true["unique_ids"] #[batchsize, num_gt_objects]
+        cls_outputs = outputs["pred_logits"] # [batchsize, num_queries, num_classes] [1,100,134]
+        cls_masks = outputs["pred_masks"]# [batchsize, h, w, num_queries]
+        individual_masks = y_true["individual_masks"] # [batchsize, num_gt_objects, h, w, 1]
+
+       
+
+        cls_assigned = tf.gather(cls_outputs, target_index, batch_dims=1, axis=1)
+        mask_assigned = tf.gather(cls_masks, target_index, batch_dims=1, axis=1)
+
+        target_classes = tf.cast(target_labels, dtype=tf.int32)
+        background = tf.equal(target_classes, 0) # Pytorch padds 133 class number where classes are background
+        
+        num_masks = tf.reduce_sum(tf.cast(tf.logical_not(background), tf.float32), axis=-1)
+        ########################################################################################################  
+        # TODO: check if we need this!
+        # if Utils.is_dist_avail_and_initialized():
+        #     num_masks = tf.distribute.get_strategy().reduce(tf.distribute.ReduceOp.SUM, num_masks, axis=None)
+        # num_masks = tf.maximum(num_masks / tf.distribute.get_strategy().num_replicas_in_sync, 1.0)
+        #########################################################################################################
+        
+        xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=target_classes, logits=cls_assigned)
+        cls_loss = self.cost_class * tf.where(background, 0.1 * xentropy, xentropy)
+        cls_weights = tf.where(background, 0.1 * tf.ones_like(cls_loss), tf.ones_like(cls_loss))
+    
+        num_masks_per_replica = tf.reduce_sum(num_masks)
+        cls_weights_per_replica = tf.reduce_sum(cls_weights)
+        replica_context = tf.distribute.get_replica_context()
+        num_masks_sum, cls_weights_sum = replica_context.all_reduce(tf.distribute.ReduceOp.SUM,[num_masks_per_replica, cls_weights_per_replica])
+       
+        # Final losses
+        cls_loss = tf.math.divide_no_nan(tf.reduce_sum(cls_loss), cls_weights_sum)
+        losses = {'focal_loss' : 0.0, 'dice_loss': 0.0}
+        
+        
+        out_mask = tf.transpose(cls_masks, perm=[0,3,1,2])
+        with tf.device(out_mask.device):
+            tgt_mask = individual_masks
+
+        tgt_mask = tf.transpose(tgt_mask, perm=[0,2,3,1,4])
+        tgt_mask = tf.squeeze(tf.cast(tgt_mask, dtype=tf.float32),axis=-1)
+        tgt_mask = tf.image.resize(tgt_mask, out_mask.shape[-2:], method='bilinear') # [b, h, w, 100]
+        out_mask = tf.transpose(out_mask, perm=[0,2,3,1]) # [b, h, w, 100]
+        
+        out_mask = tf.reshape(out_mask, [tf.shape(out_mask)[0], tf.shape(out_mask)[-1], -1]) # [b, 100, h*w]
+        tgt_mask = tf.reshape(tgt_mask, [tf.shape(tgt_mask)[0],tf.shape(tgt_mask)[-1], -1])
+        focal_loss = FocalLossMod()(tgt_mask, out_mask)
+        dice_loss = DiceLoss()(tgt_mask, out_mask)
+        
+    
+        losses['focal_loss'] = focal_loss
+        losses['dice_loss'] = dice_loss
+        background_new = background
+
+        focal_loss_weighted = tf.where(background_new, tf.zeros_like(focal_loss), focal_loss)
+        dice_loss_weighted = tf.where(background_new, tf.zeros_like(dice_loss), dice_loss)
+        focal_loss_final = tf.math.divide_no_nan(tf.math.reduce_sum(focal_loss_weighted), num_masks_sum)
+        dice_loss_final = tf.math.divide_no_nan(tf.math.reduce_sum(dice_loss_weighted), num_masks_sum)
+
+        
+        return cls_loss, focal_loss_final, dice_loss_final
+    
+    def __call__(self, outputs, y_true):
+        """
+        This performs the loss computation.
+        Parameters:
+             outputs: dict of tensors, see the output specification of the model for the format
+             y_true: list of dicts, such that len(y_true) == batch_size.
+                     The expected keys in each dict depends on the losses applied, see each loss' doc
+        """
+        outputs_without_aux = {k: v for k, v in outputs.items() if k != "aux_outputs"}
+        batch_size, num_queries = outputs["pred_logits"].shape[:2]
+        indices = self.memory_efficient_matcher(outputs_without_aux, y_true) # (batchsize, num_queries, num_queries)
+              
+        losses = {}
+      
+        cls_loss_final, focal_loss_final, dice_loss_final = self.get_loss(batch_size, outputs, y_true, indices)
+        
+        losses.update({"loss_ce": self.cost_class*cls_loss_final,
+                    "loss_focal": self.cost_focal*focal_loss_final,
+                    "loss_dice": self.cost_dice*dice_loss_final})
+        
+        # if "aux_outputs" in outputs and outputs["aux_outputs"] is not None:
+        #     for i, aux_outputs in enumerate(outputs["aux_outputs"]):
+        #         indices = self.memory_efficient_matcher(aux_outputs, y_true)
+        #         # for loss in self.losses:
+        #         cls_loss_, focal_loss_, dice_loss_ = self.get_loss(batch_size, aux_outputs, y_true, indices)
+                
+        #         l_dict = {"loss_ce" + f"_{i}": self.cost_class * cls_loss_,
+        #                    "loss_focal" + f"_{i}": self.cost_focal *focal_loss_,
+        #                    "loss_dice" + f"_{i}": self.cost_dice * dice_loss_}
+        #         losses.update(l_dict)
+        
+        return losses
+    
\ No newline at end of file
diff --git a/models/official/projects/maskformer/losses/__pycache__/__init__.cpython-38.pyc b/models/official/projects/maskformer/losses/__pycache__/__init__.cpython-38.pyc
new file mode 100644
index 00000000..13503526
Binary files /dev/null and b/models/official/projects/maskformer/losses/__pycache__/__init__.cpython-38.pyc differ
diff --git a/models/official/projects/maskformer/losses/__pycache__/__init__.cpython-39.pyc b/models/official/projects/maskformer/losses/__pycache__/__init__.cpython-39.pyc
index 60470171..eeec7296 100644
Binary files a/models/official/projects/maskformer/losses/__pycache__/__init__.cpython-39.pyc and b/models/official/projects/maskformer/losses/__pycache__/__init__.cpython-39.pyc differ
diff --git a/models/official/projects/maskformer/losses/__pycache__/inference.cpython-38.pyc b/models/official/projects/maskformer/losses/__pycache__/inference.cpython-38.pyc
new file mode 100644
index 00000000..71ae6951
Binary files /dev/null and b/models/official/projects/maskformer/losses/__pycache__/inference.cpython-38.pyc differ
diff --git a/models/official/projects/maskformer/losses/__pycache__/inference.cpython-39.pyc b/models/official/projects/maskformer/losses/__pycache__/inference.cpython-39.pyc
new file mode 100644
index 00000000..271cf81c
Binary files /dev/null and b/models/official/projects/maskformer/losses/__pycache__/inference.cpython-39.pyc differ
diff --git a/models/official/projects/maskformer/losses/__pycache__/maskformer_losses.cpython-39.pyc b/models/official/projects/maskformer/losses/__pycache__/maskformer_losses.cpython-39.pyc
index 188ef731..a30798f0 100644
Binary files a/models/official/projects/maskformer/losses/__pycache__/maskformer_losses.cpython-39.pyc and b/models/official/projects/maskformer/losses/__pycache__/maskformer_losses.cpython-39.pyc differ
diff --git a/models/official/projects/maskformer/losses/__pycache__/matchers.cpython-39.pyc b/models/official/projects/maskformer/losses/__pycache__/matchers.cpython-39.pyc
new file mode 100644
index 00000000..93ebac11
Binary files /dev/null and b/models/official/projects/maskformer/losses/__pycache__/matchers.cpython-39.pyc differ
diff --git a/models/official/projects/maskformer/losses/inference.py b/models/official/projects/maskformer/losses/inference.py
index 6b326e13..56cb237a 100644
--- a/models/official/projects/maskformer/losses/inference.py
+++ b/models/official/projects/maskformer/losses/inference.py
@@ -1,13 +1,23 @@
 import tensorflow as tf
 
 class PanopticInference():
-    def call(self, mask_true, mask_pred):
+    def call(self, mask_true, mask_pred, image_shape, num_classes = 133):
+
+        interpolate = tf.keras.layers.Resizing(
+              image_shape[1], image_shape[2], interpolation = "bilinear")
+        #permute = tf.keras.layers.Permute((3, 1, 2))
+        #mask_pred = permute(mask_pred)
+        #print(mask_pred.shape)
+        mask_pred = interpolate(mask_pred)
+        #permute = tf.keras.layers.Permute((2, 3, 1))
+        #mask_pred = permute(mask_pred)
+        
         probs = tf.keras.activations.softmax(mask_true, axis=-1)
         scores = tf.reduce_max(probs, axis=-1)
         labels = tf.argmax(probs, axis=-1)
         mask_pred = tf.keras.activations.sigmoid(mask_pred)
 
-        config_num_classes = 171
+        config_num_classes = num_classes
         object_mask_threshold = 0.0
         keep = tf.math.logical_and(tf.math.not_equal(labels, config_num_classes), scores > object_mask_threshold)
         curr_scores = scores[keep]
@@ -65,3 +75,4 @@ def call(self, mask_true, mask_pred):
             
             return panoptic_seg, segments_info
 
+
diff --git a/models/official/projects/maskformer/losses/inference_test.py b/models/official/projects/maskformer/losses/inference_test.py
old mode 100644
new mode 100755
index afa1e412..7bcc9b22
--- a/models/official/projects/maskformer/losses/inference_test.py
+++ b/models/official/projects/maskformer/losses/inference_test.py
@@ -1,17 +1,20 @@
 from absl.testing import parameterized
 import tensorflow as tf
-
+import sys
+sys.path.append("/home/isaacjaeminin/inference/tf-maskformer/models")
 from official.projects.maskformer.losses.inference import PanopticInference
-from official.projects.maskformer.maskformer import MaskFormer
+from official.projects.maskformer.modeling.maskformer import MaskFormer
 
 class PanopticInferenceTest(tf.test.TestCase, parameterized.TestCase):
     @parameterized.named_parameters(('test1',))
     def test_pass_through(self):
-        model = MaskFormer()
+        input_specs = tf.keras.layers.InputSpec(shape=[None] +
+                                            [640, 640, 3])
+        model = MaskFormer(input_specs = input_specs)
 
         input_image = tf.ones((1, 640, 640, 3))
 
-        expected_class_probs_shape = [1, 100, 172]
+        expected_class_probs_shape = [1, 100, 134]
         expected_mask_probs_shape = [1, 160, 160, 100]
 
         output = model(input_image)
@@ -19,8 +22,9 @@ def test_pass_through(self):
             output["class_prob_predictions"].shape.as_list(), expected_class_probs_shape)
         self.assertAllEqual(
             output["mask_prob_predictions"].shape.as_list(), expected_mask_probs_shape)
-        
-        out = PanopticInference().call(mask_true=output["class_prob_predictions"], mask_pred=output["mask_prob_predictions"])
+        print(input_image.shape[1])
+        print(input_image.shape[2])
+        out = PanopticInference().call(mask_true=output["class_prob_predictions"], mask_pred=output["mask_prob_predictions"],image_shape = input_image.shape, num_classes=133)
         print(out)
 
 
diff --git a/models/official/projects/maskformer/losses/maskformer_losses.py b/models/official/projects/maskformer/losses/maskformer_losses.py
index 92a31a24..4bfe5edd 100644
--- a/models/official/projects/maskformer/losses/maskformer_losses.py
+++ b/models/official/projects/maskformer/losses/maskformer_losses.py
@@ -1,8 +1,63 @@
 import tensorflow as tf
-
 from official.vision.losses import focal_loss
+from official.projects.detr.ops import matchers
+from loguru import logger
+tf.compat.v1.enable_eager_execution()
+
+def _max_by_axis(the_list):
+    all_max = the_list[0]
+    for sublist in the_list[1:]:
+        for idx, item in enumerate(sublist):
+            all_max[idx] = max(all_max[idx], item)
+    return all_max
+
+class NestedTensor(object):
+    def __init__(self, tensors, mask=None):
+        self.tensors = tf.convert_to_tensor(tensors)
+        self.mask = tf.convert_to_tensor(mask) if mask is not None else None
+
+    def to(self, device):
+        with tf.device(device):
+            cast_tensor = tf.identity(self.tensors)
+            cast_mask = tf.identity(self.mask) if self.mask is not None else None
+        return NestedTensor(cast_tensor, cast_mask)
+
+    def decompose(self):
+        return self.tensors, self.mask
+
+    def __repr__(self):
+        return str(self.tensors)
+    
+    
+def nested_tensor_from_tensor_list(tensor_list):
+    if tf.rank(tensor_list[0]).numpy() == 3:
+        max_size = _max_by_axis([list(img.shape) for img in tensor_list])
+
+        batch_shape = [len(tensor_list)] + max_size
+        b, c, h, w = batch_shape
+
+        dtype = tensor_list[0].dtype
+        device = tensor_list[0].device
 
-class FocalLoss(focal_loss.FocalLoss):
+        with tf.device(device):
+            tensor = tf.zeros(batch_shape, dtype=dtype)
+            mask = tf.ones((b, h, w), dtype=tf.bool)
+
+        for img, pad_img, m in zip(tensor_list, tensor, mask):
+            pad_img = tf.Variable(pad_img)
+            pad_img[:img.shape[0], :img.shape[1], :img.shape[2]].assign(img)
+            pad_img = tf.convert_to_tensor(pad_img)
+
+            m = tf.Variable(m)
+            false_tensor = tf.zeros((img.shape[1], img.shape[2]), dtype=tf.bool)
+            m[:img.shape[1], :img.shape[2]].assign(false_tensor)
+            m = tf.convert_to_tensor(m)
+        return NestedTensor(tensor, mask)
+    else:
+        raise ValueError("not supported")
+
+
+class FocalLossMod(focal_loss.FocalLoss):
     """Implements a Focal loss for segmentation problems.
     Reference:
         [Per-Pixel Classification is Not All You Need for Semantic Segmentation](https://arxiv.org/abs/2107.06278).
@@ -15,9 +70,10 @@ def __init__(self, alpha=0.25, gamma=2):
         gamma: The `gamma` focusing parameter to re-weight loss.
         reduction and name?
         """
-        super().__init__(alpha, gamma)
+        super().__init__(alpha, gamma, reduction='none')
+        # self.background_indices = background_indices
 
-    def call(self, y_true, y_pred, num_masks):
+    def call(self, y_true, y_pred):
         """Invokes the `FocalLoss`.
         Args:
         y_true: A tensor of size [batch, num_anchors, num_classes]. 
@@ -29,178 +85,208 @@ def call(self, y_true, y_pred, num_masks):
         Returns:
         Loss float `Tensor`.
         """
+        # background_indices = tf.expand_dims(self.background_indices, axis=0)
         weighted_loss = super().call(y_true, y_pred)
-        loss = tf.math.reduce_sum(tf.math.reduce_mean(weighted_loss,axis=1)) / num_masks
+        # print("weighted loss :", weighted_loss.shape) #(1, 100, 442368)
+        # mean over all pixels
+        loss = tf.math.reduce_mean(weighted_loss, axis=-1)
+        # logger.debug("loss shape: {}".format(loss.shape))
+        # logger.debug("loss: {}".format(loss))
         return loss
 
+    def batch(self, y_true, y_pred):
+        """
+        y_true: (b_size, 100 (num objects), h*w)
+        y_pred: (b_size, 100 (num objects), h*w)
+        """
+        hw = tf.cast(tf.shape(y_pred)[-1], dtype=tf.float32) #[100, h, w]
+        prob = tf.keras.activations.sigmoid(y_pred)
+        focal_pos = tf.pow(1 - prob, self._gamma) * tf.nn.sigmoid_cross_entropy_with_logits(labels=tf.ones_like(y_pred), logits=y_pred)
+        focal_neg = tf.pow(prob, self._gamma) * tf.nn.sigmoid_cross_entropy_with_logits(labels=tf.zeros_like(y_pred), logits=y_pred)
+        
+        if self._alpha >= 0:
+            focal_pos = focal_pos * self._alpha
+            focal_neg = focal_neg * (1 - self._alpha)
+        loss = tf.einsum("bnc,bmc->bnm", focal_pos, y_true) + tf.einsum(
+        "bnc,bmc->bnm", focal_neg, (1 - y_true)
+        )
+        return loss / hw
+    
+
+
 class DiceLoss(tf.keras.losses.Loss):
-    # TODO: figure out dice loss stuff
-    def call(self, y_true, y_pred, num_masks):
-        y_pred = tf.keras.activations.sigmoid(y_pred).reshape(-1)
-        y_true = tf.keras.activations.flatten(y_true)
-        numerator = 2 * tf.reduce_sum(y_pred * y_true, axis=1)
-        denominator = tf.reduce_sum(y_pred, axis=1) + tf.reduce_sum(y_true, axis=1)
+   
+    def __init__(self):
+        super().__init__(reduction='none')
+
+    def call(self, y_true, y_pred):
+        """
+        y_true: (b size, 100, h*w)
+        """
+       
+        y_pred = tf.reshape(tf.keras.activations.sigmoid(y_pred), (y_pred.shape[0],y_pred.shape[1],-1))
+        y_true = tf.reshape(y_true, (y_true.shape[0],tf.shape(y_true)[1],-1))
+        
+        numerator = 2 * tf.reduce_sum(y_pred * y_true, axis=-1)
+        denominator = tf.reduce_sum(y_pred, axis=-1) + tf.reduce_sum(y_true, axis=-1)
         loss = 1 - (numerator + 1) / (denominator + 1)
-        return tf.reduce_sum(loss) / num_masks
+        
+        return loss
+    
+    def batch(self, y_true, y_pred):
+        # y_pred = tf.keras.activations.sigmoid(y_pred)
+        y_pred = tf.sigmoid(y_pred)
+        y_pred = tf.reshape(y_pred, [y_pred.shape[0], -1, y_pred.shape[1]])
+        y_pred = tf.transpose(y_pred, [0, 2, 1])
+       
+        numerator = 2 * tf.einsum("bnc,bmc->bnm", y_pred, y_true)
+        denominator = tf.reduce_sum(y_pred, axis=-1)[:, tf.newaxis] + tf.expand_dims(tf.reduce_sum(y_true, axis=-1), axis=-1)
+       
 
-class Loss():
-    def __init__(self, num_classes, similarity_calc, matcher, weight_dict, eos_coef, losses):
+        loss = 1 - (numerator + 1) / (denominator + 1)
+        
+        return loss
+
+class Loss:
+    def __init__(self, num_classes, matcher, eos_coef, cost_class = 1, cost_focal = 1, cost_dice = 1):
+       
         self.num_classes = num_classes
-        self.similarity_calc = similarity_calc
         self.matcher = matcher
-        self.weight_dict = weight_dict
         self.eos_coef = eos_coef
-        self.losses = losses
-        empty_weight = tf.ones(self.num_classes + 1)
-        empty_weight = tf.tensor_scatter_nd_update(empty_weight, [[self.num_classes]], [self.eos_coef])
-        self.empty_weight = tf.Variable(empty_weight, trainable=False, name='empty_weight')        
-
-    def _get_pred_permutation_idx(self, indices):
-        batch_idx = tf.concat([tf.fill(pred,i) for i, (pred,_) in enumerate(indices)], axis=0)
-        pred_idx = tf.concat([pred for (pred,) in indices], axis=0)
-        return batch_idx, pred_idx
-    
-    def _get_true_permutation_idx(self, indices):
-        batch_idx = tf.concat([tf.fill(true,i) for i, (_,true) in enumerate(indices)], axis=0)
-        true_idx = tf.concat([true for (_,true) in indices], axis=0)
-        return batch_idx, true_idx
-    
-    def get_loss(self, loss, outputs, y_true, indices, num_masks):
-        loss_map = {"labels": ClassificationLoss().call, "masks": MaskLoss().call}
-        assert loss in loss_map
-        return loss_map[loss](outputs, y_true, indices, num_masks)
+        self.cost_class = cost_class
+        self.cost_focal = cost_focal
+        self.cost_dice = cost_dice
 
-    def call(self, outputs, y_true):
-        """This performs the loss computation.
-        Parameters:
-             outputs: dict of tensors, see the output specification of the model for the format
-             y_true: list of dicts, such that len(targets) == batch_size.
-                     The expected keys in each dict depends on the losses applied, see each loss' doc
-        """
-        outputs_without_aux = {k: v for k, v in outputs.items() if k != "aux_outputs"}
-        # TODO: check matcher doc
-        match_quality_matrix = self.similarity_calc.compare(outputs_without_aux, y_true)
-        # indices = self.matcher._match(outputs_without_aux, y_true)
-        groundtruth_weights = tf.ones(match_quality_matrix.shape[0], dtype=tf.float32)
-        self.matcher._match(match_quality_matrix, y_true)
-        # 
-
-        num_masks = sum(len(t["labels"]) for t in y_true)
-        num_masks = tf.convert_to_tensor([num_masks], dtype=tf.float64) # device?
+    
+    def memory_efficient_matcher(self, outputs, y_true):
+        batch_size, num_queries = outputs["pred_logits"].shape[:2]
+        out_mask = outputs["pred_masks"]
+        out_mask = tf.transpose(out_mask, perm=[0,3,1,2])
         
-        if Utils.is_dist_avail_and_initialized():
-            num_masks = tf.distribute.get_strategy().reduce(tf.distribute.ReduceOp.SUM, num_masks, axis=None)
-        num_masks = tf.maximum(num_masks / tf.distribute.get_strategy().num_replicas_in_sync, 1.0)
-
-        losses = {}
-        for loss in self.losses:
-            losses.update(self.get_loss(loss, outputs, y_true, indices, num_masks))
+        tgt_ids = tf.cast(y_true["unique_ids"], dtype=tf.int64)
         
-        if "aux_outputs" in outputs:
-            for i, aux_outputs in enumerate(outputs["aux_outputs"]):
-                indices = self.matcher(aux_outputs, y_true)
-                for loss in self.losses:
-                    l_dict = self.get_loss(loss, aux_outputs, y_true, indices, num_masks)
-                    l_dict = {k + f"_{i}": v for k, v in l_dict.items()}
-                    losses.update(l_dict)
+        with tf.device(out_mask.device):
+            tgt_mask = y_true["individual_masks"]
+        tgt_mask = tf.transpose(tgt_mask, perm=[0,2,3,1,4]) # [b, 100, h, w, 1]
+        cost_class = tf.gather(-tf.nn.softmax(outputs["pred_logits"]), tgt_ids, batch_dims=1, axis=-1)
+        tgt_mask = tf.squeeze(tf.cast(tgt_mask, dtype=tf.float32),axis=-1)
         
-        return losses
-
+        tgt_mask = tf.image.resize(tgt_mask, out_mask.shape[-2:], method='bilinear') # [b, h, w, 100]
+        
+        out_mask = tf.transpose(out_mask, perm=[0,2,3,1]) # [b, h, w, 100]
+        
+        out_mask = tf.reshape(out_mask, [tf.shape(out_mask)[0], tf.shape(out_mask)[-1], -1]) # [b, 100, h*w]
+        tgt_mask = tf.reshape(tgt_mask, [tf.shape(tgt_mask)[0],tf.shape(tgt_mask)[-1], -1]) # [b, 100, h*w]
+        
+        cost_focal = FocalLossMod().batch(tgt_mask, out_mask)
+        cost_dice = DiceLoss().batch(tgt_mask, out_mask)
+        
+        total_cost = (
+                self.cost_focal * cost_focal
+                + self.cost_class * cost_class
+                + self.cost_dice * cost_dice
+            )
+        
+        _, inds = matchers.hungarian_matching(total_cost)
+        return inds
 
-class ClassificationLoss(Loss):
-    def call(self, outputs, y_true, indices, num_masks):
-        assert "pred_logits" in outputs
+    
+    
+    def get_loss(self, batch_size, outputs, y_true, indices):
+        
+        target_index = tf.math.argmax(indices, axis=1) #[batchsize, 100]
+        target_labels = y_true["unique_ids"] #[batchsize, num_gt_objects]
+        cls_outputs = outputs["pred_logits"] # [batchsize, num_queries, num_classes] [1,100,134]
+        cls_masks = outputs["pred_masks"]# [batchsize, h, w, num_queries]
+        individual_masks = y_true["individual_masks"] # [batchsize, num_gt_objects, h, w, 1]
 
-        pred_logits = outputs["pred_logits"]
+       
 
-        idx = super()._get_pred_permutation_idx(indices)
-        true_classes_o = tf.concat([t["labels"][J] for t, (_, J) in zip(y_true, indices)], axis=0)
+        cls_assigned = tf.gather(cls_outputs, target_index, batch_dims=1, axis=1)
+        mask_assigned = tf.gather(cls_masks, target_index, batch_dims=1, axis=1)
 
-        with tf.device(pred_logits.device):
-            true_classes = tf.cast(tf.fill(pred_logits.shape[:2], super().num_classes), dtype=tf.int64) # device?
-        true_classes = tf.tensor_scatter_nd_update(true_classes, tf.expand_dims(idx, axis=1), true_classes_o)
+        target_classes = tf.cast(target_labels, dtype=tf.int32)
+        background = tf.equal(target_classes, 0) # Pytorch padds 133 class number where classes are background
+        
+        num_masks = tf.reduce_sum(tf.cast(tf.logical_not(background), tf.float32), axis=-1)
+        ########################################################################################################  
+        # TODO: check if we need this!
+        # if Utils.is_dist_avail_and_initialized():
+        #     num_masks = tf.distribute.get_strategy().reduce(tf.distribute.ReduceOp.SUM, num_masks, axis=None)
+        # num_masks = tf.maximum(num_masks / tf.distribute.get_strategy().num_replicas_in_sync, 1.0)
+        #########################################################################################################
+        
+        xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=target_classes, logits=cls_assigned)
+        cls_loss = self.cost_class * tf.where(background, 0.1 * xentropy, xentropy)
+        cls_weights = tf.where(background, 0.1 * tf.ones_like(cls_loss), tf.ones_like(cls_loss))
+    
+        num_masks_per_replica = tf.reduce_sum(num_masks)
+        cls_weights_per_replica = tf.reduce_sum(cls_weights)
+        replica_context = tf.distribute.get_replica_context()
+        num_masks_sum, cls_weights_sum = replica_context.all_reduce(tf.distribute.ReduceOp.SUM,[num_masks_per_replica, cls_weights_per_replica])
+       
+        # Final losses
+        cls_loss = tf.math.divide_no_nan(tf.reduce_sum(cls_loss), cls_weights_sum)
+        losses = {'focal_loss' : 0.0, 'dice_loss': 0.0}
+        
+        
+        out_mask = tf.transpose(cls_masks, perm=[0,3,1,2])
+        with tf.device(out_mask.device):
+            tgt_mask = individual_masks
 
-        # loss_ce = tf.nn.softmax_cross_entropy_with_logits(y_true, tf.transpose(pred_logits,(1,2)))
-        # loss_ce = tf.nn.weighted_cross_entropy_with_logits(y_true, tf.transpose(pred_logits,(1,2)), super().empty_weight)
-        loss_ce = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=true_classes, logits=tf.transpose(pred_logits, [0, 2, 1]))
-        weighted_loss_ce = tf.reduce_mean(tf.multiply(loss_ce, super().empty_weight))
-        losses = {"loss_ce": weighted_loss_ce}
-        return losses
+        tgt_mask = tf.transpose(tgt_mask, perm=[0,2,3,1,4])
+        tgt_mask = tf.squeeze(tf.cast(tgt_mask, dtype=tf.float32),axis=-1)
+        tgt_mask = tf.image.resize(tgt_mask, out_mask.shape[-2:], method='bilinear') # [b, h, w, 100]
+        out_mask = tf.transpose(out_mask, perm=[0,2,3,1]) # [b, h, w, 100]
+        
+        out_mask = tf.reshape(out_mask, [tf.shape(out_mask)[0], tf.shape(out_mask)[-1], -1]) # [b, 100, h*w]
+        tgt_mask = tf.reshape(tgt_mask, [tf.shape(tgt_mask)[0],tf.shape(tgt_mask)[-1], -1])
+        focal_loss = FocalLossMod()(tgt_mask, out_mask)
+        dice_loss = DiceLoss()(tgt_mask, out_mask)
+        
+    
+        losses['focal_loss'] = focal_loss
+        losses['dice_loss'] = dice_loss
+        background_new = background
 
-class MaskLoss(Loss):
-    def call(self, outputs, y_true, indices, num_masks):
-        assert "pred_masks" in outputs
-
-        pred_idx = super()._get_pred_permutation_idx(indices)
-        true_idx = super()._get_true_permutation_idx(indices)
-        pred_masks = outputs["pred_masks"]
-        pred_masks = pred_masks[pred_idx]
-        masks = [t["masks"] for t in y_true]
-
-        true_masks, valid = Utils.nested_tensor_from_tensor_list(masks).decompose()
-        # true_masks = tf.cast(true_masks, pred_masks.dtype) # device?
-        true_masks = true_masks.to(pred_masks)
-        true_masks = true_masks[true_idx]
-
-        pred_masks = tf.image.resize(pred_masks[..., tf.newaxis], true_masks.shape[-2:], method='bilinear', align_corners=False)[..., 0]
-        pred_masks = tf.reshape(pred_masks[:, 0], -1)
-
-        true_masks = tf.reshape(true_masks, -1)
-        true_masks = tf.reshape(true_masks, pred_masks.shape)
-        losses = {
-            "loss_mask": FocalLoss().call(pred_masks, true_masks, num_masks),
-            "loss_dice": DiceLoss().call(pred_masks, true_masks, num_masks)
-        }
-        return losses
+        focal_loss_weighted = tf.where(background_new, tf.zeros_like(focal_loss), focal_loss)
+        dice_loss_weighted = tf.where(background_new, tf.zeros_like(dice_loss), dice_loss)
+        focal_loss_final = tf.math.divide_no_nan(tf.math.reduce_sum(focal_loss_weighted), num_masks_sum)
+        dice_loss_final = tf.math.divide_no_nan(tf.math.reduce_sum(dice_loss_weighted), num_masks_sum)
 
-class Utils():
-    def _max_by_axis(the_list):
-        all_max = the_list[0]
-        for sublist in the_list[1:]:
-            for idx, item in enumerate(sublist):
-                all_max[idx] = max(all_max[idx], item)
-        return all_max
-
-    class NestedTensor(object):
-        def __init__(self, tensors, mask=None):
-            self.tensors = tf.convert_to_tensor(tensors)
-            self.mask = tf.convert_to_tensor(mask) if mask is not None else None
-
-        def to(self, device):
-            # type: (Device) -> NestedTensor # noqa
-            with tf.device(device):
-                cast_tensor = tf.identity(self.tensors)
-                cast_mask = tf.identity(self.mask) if self.mask is not None else None
-            return NestedTensor(cast_tensor, cast_mask)
-
-        def decompose(self):
-            return self.tensors, self.mask
-
-        def __repr__(self):
-            return str(self.tensors)
-    
-    def nested_tensor_from_tensor_list(tensor_list):
-        if tf.rank(tensor_list[0]).numpy() == 3:
-            # TODO: figure out ONNX stuff
-            # if tf.executing_eagerly():
-            #     return _onnx_nested_tensor_from_tensor_list(tensor_list)
-            
-            max_size = tf.reduce_max([tf.shape(img) for img in tensor_list], axis=0)
-            batch_shape = tf.concat([[len(tensor_list)], max_size], axis=0)
-            batch_size, num_channels, height, width = batch_shape
-            with tf.device(tensor_list[0].device):
-                tensor = tf.zeros(batch_shape, dtype=tensor_list[0].dtype)
-                mask = tf.ones((batch_size, height, width), dtype=tf.bool_)
-            for img, pad_img, m in zip(tensor_list, tensor, mask):
-                pad_img[:img.shape[0], :img.shape[1], :img.shape[2]].assign(img)
-                m[:img.shape[1], :img.shape[2]].assign(False)
-        else:
-            raise ValueError("not supported")
-        return NestedTensor(tensor, mask)
+        
+        return cls_loss, focal_loss_final, dice_loss_final
     
-    def is_dist_avail_and_initialized():
-        if not tf.distribute.has_strategy():
-            return False
-        if not tf.distribute.in_cross_replica_context():
-            return False
-        return True
\ No newline at end of file
+    def __call__(self, outputs, y_true):
+        """
+        This performs the loss computation.
+        Parameters:
+             outputs: dict of tensors, see the output specification of the model for the format
+             y_true: list of dicts, such that len(y_true) == batch_size.
+                     The expected keys in each dict depends on the losses applied, see each loss' doc
+        """
+        outputs_without_aux = {k: v for k, v in outputs.items() if k != "aux_outputs"}
+        batch_size, num_queries = outputs["pred_logits"].shape[:2]
+        indices = self.memory_efficient_matcher(outputs_without_aux, y_true) # (batchsize, num_queries, num_queries)
+              
+        losses = {}
+      
+        cls_loss_final, focal_loss_final, dice_loss_final = self.get_loss(batch_size, outputs, y_true, indices)
+        
+        losses.update({"loss_ce": self.cost_class*cls_loss_final,
+                    "loss_focal": self.cost_focal*focal_loss_final,
+                    "loss_dice": self.cost_dice*dice_loss_final})
+        
+        # if "aux_outputs" in outputs and outputs["aux_outputs"] is not None:
+        #     for i, aux_outputs in enumerate(outputs["aux_outputs"]):
+        #         indices = self.memory_efficient_matcher(aux_outputs, y_true)
+        #         # for loss in self.losses:
+        #         cls_loss_, focal_loss_, dice_loss_ = self.get_loss(batch_size, aux_outputs, y_true, indices)
+                
+        #         l_dict = {"loss_ce" + f"_{i}": self.cost_class * cls_loss_,
+        #                    "loss_focal" + f"_{i}": self.cost_focal *focal_loss_,
+        #                    "loss_dice" + f"_{i}": self.cost_dice * dice_loss_}
+        #         losses.update(l_dict)
+        
+        return losses
+    
\ No newline at end of file
diff --git a/models/official/projects/maskformer/losses/maskformer_losses_test.py b/models/official/projects/maskformer/losses/maskformer_losses_test.py
index ce72dcd9..1bbfb391 100644
--- a/models/official/projects/maskformer/losses/maskformer_losses_test.py
+++ b/models/official/projects/maskformer/losses/maskformer_losses_test.py
@@ -1,36 +1,73 @@
 from official.projects.maskformer.losses.maskformer_losses import Loss
-from research.object_detection.matchers.hungarian_matcher import HungarianBipartiteMatcher
-from research.object_detection.core.region_similarity_calculator import DETRSimilarity
+from official.projects.detr.ops.matchers import hungarian_matching
 from absl.testing import parameterized
 import tensorflow as tf
-import torch
+
+import numpy as np
 
 import pickle
 
 class LossTest(tf.test.TestCase, parameterized.TestCase):
     @parameterized.named_parameters(('test1',))
     def test_pass_through(self):
-        similarity_calc = DETRSimilarity()
-        matcher = HungarianBipartiteMatcher()
-        mask_weight = 20.0
-        dice_weight = 1.0
+        matcher = hungarian_matching
         no_object_weight = 0.1
-        weight_dict = {"loss_ce":1, "loss_mask": mask_weight, "loss_dice": dice_weight}
+       
         losses = ["labels", "masks"]
-
+        self.weight_dict = {
+            "ce_loss" : 1.0,
+            "focal_loss" : 20.0,
+            "dice_loss" : 1.0,
+        }
         loss = Loss(
-            num_classes = 171,
-            similarity_calc = similarity_calc,
+            num_classes = 133,
             matcher = matcher,
-            weight_dict = weight_dict,
             eos_coef = no_object_weight,
-            losses = losses
+            cost_class= 1.0,
+            cost_dice= 1.0,
+            cost_focal=20.0
         )
         
-        with open("losses_test.pkl", "rb") as f:
-            params = pickle.load(f)
-        
-        print(loss.call(params["outputs"], params["targets"]))
+        # outputs = {"pred_logits":tf.convert_to_tensor(np.load("output_pred_logits.npy")), "pred_masks":tf.convert_to_tensor(np.load("output_pred_masks.npy"))}
+        # print(f"outputs['pred_logits'] shape is {outputs['pred_logits'].shape}")
+        # print(f"outputs['pred_masks'] shape is {outputs['pred_masks'].shape}")
+
+        main_pth = "/depot/qqiu/data/vishal/projects/tf-maskformer/models/official/projects/maskformer/losses"
+        aux_out_0 = {"pred_logits" : tf.convert_to_tensor(np.load(main_pth+"/tensors/aux_outputs_pred_logits0.npy")), "pred_masks": tf.convert_to_tensor(np.load(main_pth+"/tensors/aux_outputs_pred_masks0.npy"))}
+        aux_out_1 = {"pred_logits" : tf.convert_to_tensor(np.load(main_pth+"/tensors/aux_outputs_pred_logits1.npy")), "pred_masks": tf.convert_to_tensor(np.load(main_pth+"/tensors/aux_outputs_pred_masks1.npy"))}
+        aux_out_2 = {"pred_logits" : tf.convert_to_tensor(np.load(main_pth+"/tensors/aux_outputs_pred_logits2.npy")), "pred_masks": tf.convert_to_tensor(np.load(main_pth+"/tensors/aux_outputs_pred_masks2.npy"))}
+        aux_out_3 = {"pred_logits" : tf.convert_to_tensor(np.load(main_pth+"/tensors/aux_outputs_pred_logits3.npy")), "pred_masks": tf.convert_to_tensor(np.load(main_pth+"/tensors/aux_outputs_pred_masks3.npy"))}
+        aux_out_4 = {"pred_logits" : tf.convert_to_tensor(np.load(main_pth+"/tensors/aux_outputs_pred_logits4.npy")), "pred_masks": tf.convert_to_tensor(np.load(main_pth+"/tensors/aux_outputs_pred_masks4.npy"))}
+        aux_outputs = [aux_out_0, aux_out_1, aux_out_2, aux_out_3, aux_out_4]
+        pred_logits_load = tf.convert_to_tensor(np.load(main_pth+"/tensors/output_pred_logits.npy")) 
+        pred_masks_load = tf.convert_to_tensor(np.load(main_pth+"/tensors/output_pred_masks.npy"))
+        outputs = {
+            "pred_logits": pred_logits_load,
+            "pred_masks": pred_masks_load,
+            "aux_outputs": aux_outputs 
+        }
+
+        # Load the new_targets_dict NumPy array
+        targets = []
+        # TODO :  Caution the below loop is for each image in the batch
+        for i in range(2): # Here 2 is for batch size 
+            targets.append(
+                {
+                    "labels": tf.convert_to_tensor(np.load(main_pth+'/tensors/targets_labels_'+str(i)+'.npy')),
+                    "masks": tf.convert_to_tensor(np.load(main_pth+'/tensors/targets_masks_'+str(i)+'.npy')),
+                }
+            )
+
+
+        losses = loss(outputs, targets)
+       
+
+        print("Losses are : ", losses)
+        print("Total Loss is :", losses['loss_ce'] + losses['loss_dice'] + losses['loss_focal'])
+        # for i in range(4):
+        #     print(f"Total aux Loss {i} : losses['loss_ce_'+{str(i)}] + losses['loss_dice_'+{str(i)}] + losses['loss_focal_'+{str(i)}]")
+        # TODO: Check if this is correct
+        # self.assertAllEqual(losses, )
 
 if __name__ == '__main__':
     tf.test.main()
\ No newline at end of file
diff --git a/models/official/projects/maskformer/losses/matchers.py b/models/official/projects/maskformer/losses/matchers.py
new file mode 100644
index 00000000..6d57117f
--- /dev/null
+++ b/models/official/projects/maskformer/losses/matchers.py
@@ -0,0 +1,492 @@
+# Copyright 2023 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Tensorflow implementation to solve the Linear Sum Assignment problem.
+
+The Linear Sum Assignment problem involves determining the minimum weight
+matching for bipartite graphs. For example, this problem can be defined by
+a 2D matrix C, where each element i,j determines the cost of matching worker i
+with job j. The solution to the problem is a complete assignment of jobs to
+workers, such that no job is assigned to more than one work and no worker is
+assigned more than one job, with minimum cost.
+
+This implementation builds off of the Hungarian
+Matching Algorithm (https://www.cse.ust.hk/~golin/COMP572/Notes/Matching.pdf).
+
+Based on the original implementation by Jiquan Ngiam <jngiam@google.com>.
+"""
+import tensorflow as tf
+# from official.modeling import tf_utils
+from official.modeling import tf_utils
+import numpy as np
+def _prepare(weights):
+  """Prepare the cost matrix.
+
+  To speed up computational efficiency of the algorithm, all weights are shifted
+  to be non-negative. Each element is reduced by the row / column minimum. Note
+  that neither operation will effect the resulting solution but will provide
+  a better starting point for the greedy assignment. Note this corresponds to
+  the pre-processing and step 1 of the Hungarian algorithm from Wikipedia.
+
+  Args:
+    weights: A float32 [batch_size, num_elems, num_elems] tensor, where each
+      inner matrix represents weights to be use for matching.
+
+  Returns:
+    A prepared weights tensor of the same shape and dtype.
+  """
+  # Since every worker needs a job and every job needs a worker, we can subtract
+  # the minimum from each.
+  weights -= tf.reduce_min(weights, axis=2, keepdims=True)
+  weights -= tf.reduce_min(weights, axis=1, keepdims=True)
+  return weights
+
+
+def _greedy_assignment(adj_matrix):
+  """Greedily assigns workers to jobs based on an adjaceny matrix.
+
+  Starting with an adjacency matrix representing the available connections
+  in the bi-partite graph, this function greedily chooses elements such
+  that each worker is matched to at most one job (or each job is assigned to
+  at most one worker). Note, if the adjacency matrix has no available values
+  for a particular row/column, the corresponding job/worker may go unassigned.
+
+  Args:
+    adj_matrix: A bool [batch_size, num_elems, num_elems] tensor, where each
+      element of the inner matrix represents whether the worker (row) can be
+      matched to the job (column).
+
+  Returns:
+    A bool [batch_size, num_elems, num_elems] tensor, where each element of the
+    inner matrix represents whether the worker has been matched to the job.
+    Each row and column can have at most one true element. Some of the rows
+    and columns may not be matched.
+  """
+  
+  _, num_elems, _ = tf_utils.get_shape_list(adj_matrix, expected_rank=3)
+  adj_matrix = tf.transpose(adj_matrix, [1, 0, 2])
+
+  # Create a dynamic TensorArray containing the assignments for each worker/job
+  assignment = tf.TensorArray(tf.bool, num_elems)
+
+  # Store the elements assigned to each column to update each iteration
+  col_assigned = tf.zeros_like(adj_matrix[0, ...], dtype=tf.bool)
+
+  # Iteratively assign each row using tf.foldl. Intuitively, this is a loop
+  # over rows, where we incrementally assign each row.
+  def _assign_row(accumulator, row_adj):
+    # The accumulator tracks the row assignment index.
+    idx, assignment, col_assigned = accumulator
+
+    # Viable candidates cannot already be assigned to another job.
+    candidates = row_adj & (~col_assigned)
+
+    # Deterministically assign to the candidates of the highest index count.
+    max_candidate_idx = tf.argmax(
+        tf.cast(candidates, tf.int32), axis=1, output_type=tf.int32)
+
+    candidates_indicator = tf.one_hot(
+        max_candidate_idx,
+        num_elems,
+        on_value=True,
+        off_value=False,
+        dtype=tf.bool)
+    print("[INFO] Candidate indicator shape :", candidates_indicator.shape)
+    print("[INFO] Candidate shape :", candidates.shape)
+    # candidates_indicator &= candidates 
+    candidates_indicator = tf.convert_to_tensor(np.logical_and(candidates_indicator.numpy().T, candidates.numpy()))
+    # Make assignment to the column.
+    col_assigned |= candidates_indicator
+    assignment = assignment.write(idx, candidates_indicator)
+
+    return (idx + 1, assignment, col_assigned)
+
+  _, assignment, _ = tf.foldl(
+      _assign_row, adj_matrix, (0, assignment, col_assigned), back_prop=False)
+
+  assignment = assignment.stack()
+  assignment = tf.transpose(assignment, [1, 0, 2])
+  return assignment
+
+
+def _find_augmenting_path(assignment, adj_matrix):
+  """Finds an augmenting path given an assignment and an adjacency matrix.
+
+  The augmenting path search starts from the unassigned workers, then goes on
+  to find jobs (via an unassigned pairing), then back again to workers (via an
+  existing pairing), and so on. The path alternates between unassigned and
+  existing pairings. Returns the state after the search.
+
+  Note: In the state the worker and job, indices are 1-indexed so that we can
+  use 0 to represent unreachable nodes. State contains the following keys:
+
+  - jobs: A [batch_size, 1, num_elems] tensor containing the highest index
+      unassigned worker that can reach this job through a path.
+  - jobs_from_worker: A [batch_size, num_elems] tensor containing the worker
+      reached immediately before this job.
+  - workers: A [batch_size, num_elems, 1] tensor containing the highest index
+      unassigned worker that can reach this worker through a path.
+  - workers_from_job: A [batch_size, num_elems] tensor containing the job
+      reached immediately before this worker.
+  - new_jobs: A bool [batch_size, num_elems] tensor containing True if the
+      unassigned job can be reached via a path.
+
+  State can be used to recover the path via backtracking.
+
+  Args:
+    assignment: A bool [batch_size, num_elems, num_elems] tensor, where each
+      element of the inner matrix represents whether the worker has been matched
+      to the job. This may be a partial assignment.
+    adj_matrix: A bool [batch_size, num_elems, num_elems] tensor, where each
+      element of the inner matrix represents whether the worker (row) can be
+      matched to the job (column).
+
+  Returns:
+    A state dict, which represents the outcome of running an augmenting
+    path search on the graph given the assignment.
+  """
+  batch_size, num_elems, _ = tf_utils.get_shape_list(
+      assignment, expected_rank=3)
+  unassigned_workers = ~tf.reduce_any(assignment, axis=2, keepdims=True)
+  unassigned_jobs = ~tf.reduce_any(assignment, axis=1, keepdims=True)
+
+  unassigned_pairings = tf.cast(adj_matrix & ~assignment, tf.int32)
+  existing_pairings = tf.cast(assignment, tf.int32)
+
+  # Initialize unassigned workers to have non-zero ids, assigned workers will
+  # have ids = 0.
+  worker_indices = tf.range(1, num_elems + 1, dtype=tf.int32)
+  init_workers = tf.tile(worker_indices[tf.newaxis, :, tf.newaxis],
+                         [batch_size, 1, 1])
+  init_workers *= tf.cast(unassigned_workers, tf.int32)
+
+  state = {
+      "jobs": tf.zeros((batch_size, 1, num_elems), dtype=tf.int32),
+      "jobs_from_worker": tf.zeros((batch_size, num_elems), dtype=tf.int32),
+      "workers": init_workers,
+      "workers_from_job": tf.zeros((batch_size, num_elems), dtype=tf.int32)
+  }
+
+  def _has_active_workers(state, curr_workers):
+    """Check if there are still active workers."""
+    del state
+    return tf.reduce_sum(curr_workers) > 0
+
+  def _augment_step(state, curr_workers):
+    """Performs one search step."""
+
+    # Note: These steps could be potentially much faster if sparse matrices are
+    # supported. The unassigned_pairings and existing_pairings matrices can be
+    # very sparse.
+
+    # Find potential jobs using current workers.
+    potential_jobs = curr_workers * unassigned_pairings
+    curr_jobs = tf.reduce_max(potential_jobs, axis=1, keepdims=True)
+    curr_jobs_from_worker = 1 + tf.argmax(
+        potential_jobs, axis=1, output_type=tf.int32)
+
+    # Remove already accessible jobs from curr_jobs.
+    default_jobs = tf.zeros_like(state["jobs"], dtype=state["jobs"].dtype)
+    curr_jobs = tf.where(state["jobs"] > 0, default_jobs, curr_jobs)
+    curr_jobs_from_worker *= tf.cast(curr_jobs > 0, tf.int32)[:, 0, :]
+
+    # Find potential workers from current jobs.
+    potential_workers = curr_jobs * existing_pairings
+    curr_workers = tf.reduce_max(potential_workers, axis=2, keepdims=True)
+    curr_workers_from_job = 1 + tf.argmax(
+        potential_workers, axis=2, output_type=tf.int32)
+
+    # Remove already accessible workers from curr_workers.
+    default_workers = tf.zeros_like(state["workers"])
+    curr_workers = tf.where(
+        state["workers"] > 0, default_workers, curr_workers)
+    curr_workers_from_job *= tf.cast(curr_workers > 0, tf.int32)[:, :, 0]
+
+    # Update state so that we can backtrack later.
+    state = state.copy()
+    state["jobs"] = tf.maximum(state["jobs"], curr_jobs)
+    state["jobs_from_worker"] = tf.maximum(state["jobs_from_worker"],
+                                           curr_jobs_from_worker)
+    state["workers"] = tf.maximum(state["workers"], curr_workers)
+    state["workers_from_job"] = tf.maximum(state["workers_from_job"],
+                                           curr_workers_from_job)
+
+    return state, curr_workers
+
+  state, _ = tf.while_loop(
+      _has_active_workers,
+      _augment_step, (state, init_workers),
+      back_prop=False)
+
+  # Compute new jobs, this is useful for determnining termnination of the
+  # maximum bi-partite matching and initialization for backtracking.
+  new_jobs = (state["jobs"] > 0) & unassigned_jobs
+  state["new_jobs"] = new_jobs[:, 0, :]
+  return state
+
+
+def _improve_assignment(assignment, state):
+  """Improves an assignment by backtracking the augmented path using state.
+
+  Args:
+    assignment: A bool [batch_size, num_elems, num_elems] tensor, where each
+      element of the inner matrix represents whether the worker has been matched
+      to the job. This may be a partial assignment.
+    state: A dict, which represents the outcome of running an augmenting path
+      search on the graph given the assignment.
+
+  Returns:
+    A new assignment matrix of the same shape and type as assignment, where the
+    assignment has been updated using the augmented path found.
+  """
+  batch_size, num_elems, _ = tf_utils.get_shape_list(assignment, 3)
+
+  # We store the current job id and iteratively backtrack using jobs_from_worker
+  # and workers_from_job until we reach an unassigned worker. We flip all the
+  # assignments on this path to discover a better overall assignment.
+
+  # Note: The indices in state are 1-indexed, where 0 represents that the
+  # worker / job cannot be reached.
+
+  # Obtain initial job indices based on new_jobs.
+  curr_job_idx = tf.argmax(
+      tf.cast(state["new_jobs"], tf.int32), axis=1, output_type=tf.int32)
+
+  # Track whether an example is actively being backtracked. Since we are
+  # operating on a batch, not all examples in the batch may be active.
+  active = tf.gather(state["new_jobs"], curr_job_idx, batch_dims=1)
+  batch_range = tf.range(0, batch_size, dtype=tf.int32)
+
+  # Flip matrix tracks which assignments we need to flip - corresponding to the
+  # augmenting path taken. We use an integer tensor here so that we can use
+  # tensor_scatter_nd_add to update the tensor, and then cast it back to bool
+  # after the loop.
+  flip_matrix = tf.zeros((batch_size, num_elems, num_elems), dtype=tf.int32)
+
+  def _has_active_backtracks(flip_matrix, active, curr_job_idx):
+    """Check if there are still active workers."""
+    del flip_matrix, curr_job_idx
+    return tf.reduce_any(active)
+
+  def _backtrack_one_step(flip_matrix, active, curr_job_idx):
+    """Take one step in backtracking."""
+    # Discover the worker that the job originated from, note that this worker
+    # must exist by construction.
+    curr_worker_idx = tf.gather(
+        state["jobs_from_worker"], curr_job_idx, batch_dims=1) - 1
+    curr_worker_idx = tf.maximum(curr_worker_idx, 0)
+    update_indices = tf.stack([batch_range, curr_worker_idx, curr_job_idx],
+                              axis=1)
+    update_indices = tf.maximum(update_indices, 0)
+    flip_matrix = tf.tensor_scatter_nd_add(flip_matrix, update_indices,
+                                           tf.cast(active, tf.int32))
+
+    # Discover the (potential) job that the worker originated from.
+    curr_job_idx = tf.gather(
+        state["workers_from_job"], curr_worker_idx, batch_dims=1) - 1
+    # Note that jobs may not be active, and we track that here (before
+    # adjusting indices so that they are all >= 0 for gather).
+    active &= curr_job_idx >= 0
+    curr_job_idx = tf.maximum(curr_job_idx, 0)
+    update_indices = tf.stack([batch_range, curr_worker_idx, curr_job_idx],
+                              axis=1)
+    update_indices = tf.maximum(update_indices, 0)
+    flip_matrix = tf.tensor_scatter_nd_add(flip_matrix, update_indices,
+                                           tf.cast(active, tf.int32))
+
+    return flip_matrix, active, curr_job_idx
+
+  flip_matrix, _, _ = tf.while_loop(
+      _has_active_backtracks,
+      _backtrack_one_step, (flip_matrix, active, curr_job_idx),
+      back_prop=False)
+
+  flip_matrix = tf.cast(flip_matrix, tf.bool)
+  assignment = tf.math.logical_xor(assignment, flip_matrix)
+
+  return assignment
+
+
+def _maximum_bipartite_matching(adj_matrix, assignment=None):
+  """Performs maximum bipartite matching using augmented paths.
+
+  Args:
+    adj_matrix: A bool [batch_size, num_elems, num_elems] tensor, where each
+      element of the inner matrix represents whether the worker (row) can be
+      matched to the job (column).
+    assignment: An optional bool [batch_size, num_elems, num_elems] tensor,
+      where each element of the inner matrix represents whether the worker has
+      been matched to the job. This may be a partial assignment. If specified,
+      this assignment will be used to seed the iterative algorithm.
+
+  Returns:
+    A state dict representing the final augmenting path state search, and
+    a maximum bipartite matching assignment tensor. Note that the state outcome
+    can be used to compute a minimum vertex cover for the bipartite graph.
+  """
+
+  if assignment is None:
+    assignment = _greedy_assignment(adj_matrix)
+
+  state = _find_augmenting_path(assignment, adj_matrix)
+
+  def _has_new_jobs(state, assignment):
+    del assignment
+    return tf.reduce_any(state["new_jobs"])
+
+  def _improve_assignment_and_find_new_path(state, assignment):
+    assignment = _improve_assignment(assignment, state)
+    state = _find_augmenting_path(assignment, adj_matrix)
+    return state, assignment
+
+  state, assignment = tf.while_loop(
+      _has_new_jobs,
+      _improve_assignment_and_find_new_path, (state, assignment),
+      back_prop=False)
+
+  return state, assignment
+
+
+def _compute_cover(state, assignment):
+  """Computes a cover for the bipartite graph.
+
+  We compute a cover using the construction provided at
+  https://en.wikipedia.org/wiki/K%C5%91nig%27s_theorem_(graph_theory)#Proof
+  which uses the outcome from the alternating path search.
+
+  Args:
+    state: A state dict, which represents the outcome of running an augmenting
+      path search on the graph given the assignment.
+    assignment: An optional bool [batch_size, num_elems, num_elems] tensor,
+      where each element of the inner matrix represents whether the worker has
+      been matched to the job. This may be a partial assignment. If specified,
+      this assignment will be used to seed the iterative algorithm.
+
+  Returns:
+    A tuple of (workers_cover, jobs_cover) corresponding to row and column
+    covers for the bipartite graph. workers_cover is a boolean tensor of shape
+    [batch_size, num_elems, 1] and jobs_cover is a boolean tensor of shape
+    [batch_size, 1, num_elems].
+  """
+  assigned_workers = tf.reduce_any(assignment, axis=2, keepdims=True)
+  assigned_jobs = tf.reduce_any(assignment, axis=1, keepdims=True)
+
+  reachable_workers = state["workers"] > 0
+  reachable_jobs = state["jobs"] > 0
+
+  workers_cover = assigned_workers & (~reachable_workers)
+  jobs_cover = assigned_jobs & reachable_jobs
+
+  return workers_cover, jobs_cover
+
+
+def _update_weights_using_cover(workers_cover, jobs_cover, weights):
+  """Updates weights for hungarian matching using a cover.
+
+  We first find the minimum uncovered weight. Then, we subtract this from all
+  the uncovered weights, and add it to all the doubly covered weights.
+
+  Args:
+    workers_cover: A boolean tensor of shape [batch_size, num_elems, 1].
+    jobs_cover: A boolean tensor of shape [batch_size, 1, num_elems].
+    weights: A float32 [batch_size, num_elems, num_elems] tensor, where each
+      inner matrix represents weights to be use for matching.
+
+  Returns:
+    A new weight matrix with elements adjusted by the cover.
+  """
+  max_value = tf.reduce_max(weights)
+
+  covered = workers_cover | jobs_cover
+  double_covered = workers_cover & jobs_cover
+
+  uncovered_weights = tf.where(covered,
+                               tf.ones_like(weights) * max_value, weights)
+  min_weight = tf.reduce_min(uncovered_weights, axis=[-2, -1], keepdims=True)
+
+  add_weight = tf.where(double_covered,
+                        tf.ones_like(weights) * min_weight,
+                        tf.zeros_like(weights))
+  sub_weight = tf.where(covered, tf.zeros_like(weights),
+                        tf.ones_like(weights) * min_weight)
+
+  return weights + add_weight - sub_weight
+
+
+def assert_rank(tensor, expected_rank, name=None):
+  """Raises an exception if the tensor rank is not of the expected rank.
+
+  Args:
+    tensor: A tf.Tensor to check the rank of.
+    expected_rank: Python integer or list of integers, expected rank.
+    name: Optional name of the tensor for the error message.
+
+  Raises:
+    ValueError: If the expected shape doesn't match the actual shape.
+  """
+  expected_rank_dict = {}
+  if isinstance(expected_rank, int):
+    expected_rank_dict[expected_rank] = True
+  else:
+    for x in expected_rank:
+      expected_rank_dict[x] = True
+
+  actual_rank = len(tensor.shape)
+  if actual_rank not in expected_rank_dict:
+    raise ValueError(
+        "For the tensor `%s`, the actual tensor rank `%d` (shape = %s) is not "
+        "equal to the expected tensor rank `%s`" %
+        (name, actual_rank, str(tensor.shape), str(expected_rank)))
+
+
+def hungarian_matching(weights):
+  """Computes the minimum linear sum assignment using the Hungarian algorithm.
+
+  Args:
+    weights: A float32 [batch_size, num_elems, num_elems] tensor, where each
+      inner matrix represents weights to be use for matching.
+
+  Returns:
+    A bool [batch_size, num_elems, num_elems] tensor, where each element of the
+    inner matrix represents whether the worker has been matched to the job.
+    The returned matching will always be a perfect match.
+  """
+  batch_size, num_elems, _ = tf_utils.get_shape_list(weights, 3)
+
+  weights = _prepare(weights)
+  adj_matrix = tf.equal(weights, 0.)
+  state, assignment = _maximum_bipartite_matching(adj_matrix)
+  workers_cover, jobs_cover = _compute_cover(state, assignment)
+
+  def _cover_incomplete(workers_cover, jobs_cover, *args):
+    del args
+    cover_sum = (
+        tf.reduce_sum(tf.cast(workers_cover, tf.int32)) +
+        tf.reduce_sum(tf.cast(jobs_cover, tf.int32)))
+    return tf.less(cover_sum, batch_size * num_elems)
+
+  def _update_weights_and_match(workers_cover, jobs_cover, weights, assignment):
+    weights = _update_weights_using_cover(workers_cover, jobs_cover, weights)
+    adj_matrix = tf.equal(weights, 0.)
+    state, assignment = _maximum_bipartite_matching(adj_matrix, assignment)
+    workers_cover, jobs_cover = _compute_cover(state, assignment)
+    return workers_cover, jobs_cover, weights, assignment
+
+  workers_cover, jobs_cover, weights, assignment = tf.while_loop(
+      _cover_incomplete,
+      _update_weights_and_match,
+      (workers_cover, jobs_cover, weights, assignment),
+      back_prop=False)
+  return weights, assignment
+
diff --git a/models/official/projects/maskformer/losses/matchers_test.py b/models/official/projects/maskformer/losses/matchers_test.py
new file mode 100644
index 00000000..87e27430
--- /dev/null
+++ b/models/official/projects/maskformer/losses/matchers_test.py
@@ -0,0 +1,94 @@
+# Copyright 2023 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Tests for tensorflow_models.official.projects.detr.ops.matchers."""
+
+import numpy as np
+from scipy import optimize
+import tensorflow as tf
+
+# from official.projects.detr.ops import matchers
+import matchers
+class MatchersOpsTest(tf.test.TestCase):
+
+  def testLinearSumAssignment(self):
+    """Check a simple 2D test case of the Linear Sum Assignment problem.
+
+    Ensures that the implementation of the matching algorithm is correct
+    and functional on TPUs.
+    """
+    cost_matrix = np.array([[[4, 1, 3], [2, 0, 5], [3, 2, 2]]],
+                           dtype=np.float32)
+    _, adjacency_matrix = matchers.hungarian_matching(tf.constant(cost_matrix))
+    adjacency_output = adjacency_matrix.numpy()
+
+    correct_output = np.array([
+        [0, 1, 0],
+        [1, 0, 0],
+        [0, 0, 1],
+    ], dtype=bool)
+    self.assertAllEqual(adjacency_output[0], correct_output)
+
+  def testBatchedLinearSumAssignment(self):
+    """Check a batched case of the Linear Sum Assignment Problem.
+
+    Ensures that a correct solution is found for all inputted problems within
+    a batch.
+    """
+    cost_matrix = np.array([
+        [[4, 1, 3], [2, 0, 5], [3, 2, 2]],
+        [[1, 4, 3], [0, 2, 5], [2, 3, 2]],
+        [[1, 3, 4], [0, 5, 2], [2, 2, 3]],
+    ],
+                           dtype=np.float32)
+    _, adjacency_matrix = matchers.hungarian_matching(tf.constant(cost_matrix))
+    adjacency_output = adjacency_matrix.numpy()
+
+    # Hand solved correct output for the linear sum assignment problem
+    correct_output = np.array([
+        [[0, 1, 0], [1, 0, 0], [0, 0, 1]],
+        [[1, 0, 0], [0, 1, 0], [0, 0, 1]],
+        [[1, 0, 0], [0, 0, 1], [0, 1, 0]],
+    ],
+                              dtype=bool)
+    self.assertAllClose(adjacency_output, correct_output)
+
+  def testMaximumBipartiteMatching(self):
+    """Check that the maximum bipartite match assigns the correct numbers."""
+    adj_matrix = tf.cast([[
+        [1, 0, 0, 0, 1],
+        [0, 1, 0, 1, 0],
+        [0, 0, 1, 0, 0],
+        [0, 1, 0, 0, 0],
+        [1, 0, 0, 0, 0],
+    ]], tf.bool)
+    _, assignment = matchers._maximum_bipartite_matching(adj_matrix)
+    self.assertEqual(np.sum(assignment.numpy()), 5)
+
+  def testAssignmentMatchesScipy(self):
+    """Check that the Linear Sum Assignment matches the Scipy implementation."""
+    batch_size, num_elems = 2, 25
+    weights = tf.random.uniform((batch_size, num_elems, num_elems),
+                                minval=0.,
+                                maxval=1.)
+    weights, assignment = matchers.hungarian_matching(weights)
+
+    for idx in range(batch_size):
+      _, scipy_assignment = optimize.linear_sum_assignment(weights.numpy()[idx])
+      hungarian_assignment = np.where(assignment.numpy()[idx])[1]
+
+      self.assertAllEqual(hungarian_assignment, scipy_assignment)
+
+if __name__ == '__main__':
+  tf.test.main()
diff --git a/models/official/projects/maskformer/losses/tensors/aux_outputs_pred_logits0.npy b/models/official/projects/maskformer/losses/tensors/aux_outputs_pred_logits0.npy
new file mode 100644
index 00000000..c1a4f3d7
Binary files /dev/null and b/models/official/projects/maskformer/losses/tensors/aux_outputs_pred_logits0.npy differ
diff --git a/models/official/projects/maskformer/losses/tensors/aux_outputs_pred_logits1.npy b/models/official/projects/maskformer/losses/tensors/aux_outputs_pred_logits1.npy
new file mode 100644
index 00000000..dbb88e0d
Binary files /dev/null and b/models/official/projects/maskformer/losses/tensors/aux_outputs_pred_logits1.npy differ
diff --git a/models/official/projects/maskformer/losses/tensors/aux_outputs_pred_logits2.npy b/models/official/projects/maskformer/losses/tensors/aux_outputs_pred_logits2.npy
new file mode 100644
index 00000000..e45a60fa
Binary files /dev/null and b/models/official/projects/maskformer/losses/tensors/aux_outputs_pred_logits2.npy differ
diff --git a/models/official/projects/maskformer/losses/tensors/aux_outputs_pred_logits3.npy b/models/official/projects/maskformer/losses/tensors/aux_outputs_pred_logits3.npy
new file mode 100644
index 00000000..831d1921
Binary files /dev/null and b/models/official/projects/maskformer/losses/tensors/aux_outputs_pred_logits3.npy differ
diff --git a/models/official/projects/maskformer/losses/tensors/aux_outputs_pred_logits4.npy b/models/official/projects/maskformer/losses/tensors/aux_outputs_pred_logits4.npy
new file mode 100644
index 00000000..d3c21f71
Binary files /dev/null and b/models/official/projects/maskformer/losses/tensors/aux_outputs_pred_logits4.npy differ
diff --git a/models/official/projects/maskformer/losses/tensors/aux_outputs_pred_masks0.npy b/models/official/projects/maskformer/losses/tensors/aux_outputs_pred_masks0.npy
new file mode 100644
index 00000000..f323e13e
Binary files /dev/null and b/models/official/projects/maskformer/losses/tensors/aux_outputs_pred_masks0.npy differ
diff --git a/models/official/projects/maskformer/losses/tensors/aux_outputs_pred_masks1.npy b/models/official/projects/maskformer/losses/tensors/aux_outputs_pred_masks1.npy
new file mode 100644
index 00000000..0adb02a8
Binary files /dev/null and b/models/official/projects/maskformer/losses/tensors/aux_outputs_pred_masks1.npy differ
diff --git a/models/official/projects/maskformer/losses/tensors/aux_outputs_pred_masks2.npy b/models/official/projects/maskformer/losses/tensors/aux_outputs_pred_masks2.npy
new file mode 100644
index 00000000..cac4ef36
Binary files /dev/null and b/models/official/projects/maskformer/losses/tensors/aux_outputs_pred_masks2.npy differ
diff --git a/models/official/projects/maskformer/losses/tensors/aux_outputs_pred_masks3.npy b/models/official/projects/maskformer/losses/tensors/aux_outputs_pred_masks3.npy
new file mode 100644
index 00000000..f82b3d88
Binary files /dev/null and b/models/official/projects/maskformer/losses/tensors/aux_outputs_pred_masks3.npy differ
diff --git a/models/official/projects/maskformer/losses/tensors/aux_outputs_pred_masks4.npy b/models/official/projects/maskformer/losses/tensors/aux_outputs_pred_masks4.npy
new file mode 100644
index 00000000..e0cf3356
Binary files /dev/null and b/models/official/projects/maskformer/losses/tensors/aux_outputs_pred_masks4.npy differ
diff --git a/models/official/projects/maskformer/losses/tensors/images.npy b/models/official/projects/maskformer/losses/tensors/images.npy
new file mode 100644
index 00000000..9a232454
Binary files /dev/null and b/models/official/projects/maskformer/losses/tensors/images.npy differ
diff --git a/models/official/projects/maskformer/losses/tensors/output_pred_logits.npy b/models/official/projects/maskformer/losses/tensors/output_pred_logits.npy
new file mode 100644
index 00000000..cfb07381
Binary files /dev/null and b/models/official/projects/maskformer/losses/tensors/output_pred_logits.npy differ
diff --git a/models/official/projects/maskformer/losses/tensors/output_pred_masks.npy b/models/official/projects/maskformer/losses/tensors/output_pred_masks.npy
new file mode 100644
index 00000000..b1c11dd3
Binary files /dev/null and b/models/official/projects/maskformer/losses/tensors/output_pred_masks.npy differ
diff --git a/models/official/projects/maskformer/losses/tensors/targets_labels_0.npy b/models/official/projects/maskformer/losses/tensors/targets_labels_0.npy
new file mode 100644
index 00000000..f865d192
Binary files /dev/null and b/models/official/projects/maskformer/losses/tensors/targets_labels_0.npy differ
diff --git a/models/official/projects/maskformer/losses/tensors/targets_labels_1.npy b/models/official/projects/maskformer/losses/tensors/targets_labels_1.npy
new file mode 100644
index 00000000..af918e3f
Binary files /dev/null and b/models/official/projects/maskformer/losses/tensors/targets_labels_1.npy differ
diff --git a/models/official/projects/maskformer/losses/tensors/targets_masks_0.npy b/models/official/projects/maskformer/losses/tensors/targets_masks_0.npy
new file mode 100644
index 00000000..d82390d2
Binary files /dev/null and b/models/official/projects/maskformer/losses/tensors/targets_masks_0.npy differ
diff --git a/models/official/projects/maskformer/losses/tensors/targets_masks_1.npy b/models/official/projects/maskformer/losses/tensors/targets_masks_1.npy
new file mode 100644
index 00000000..75a78279
Binary files /dev/null and b/models/official/projects/maskformer/losses/tensors/targets_masks_1.npy differ
diff --git a/models/official/projects/maskformer/modeling/.ipynb_checkpoints/maskformer-checkpoint.py b/models/official/projects/maskformer/modeling/.ipynb_checkpoints/maskformer-checkpoint.py
new file mode 100644
index 00000000..a1f659c1
--- /dev/null
+++ b/models/official/projects/maskformer/modeling/.ipynb_checkpoints/maskformer-checkpoint.py
@@ -0,0 +1,113 @@
+import tensorflow as tf
+
+from official.vision.modeling.backbones import resnet
+from official.projects.maskformer.modeling.decoder.transformer_decoder import MaskFormerTransformer
+from official.projects.maskformer.modeling.layers.nn_block import MLPHead
+from official.projects.maskformer.modeling.decoder.transformer_pixel_decoder import TransformerFPN
+
+# TODO(ibrahim): Add all parameters model parameters and remove hardcoding.
+class MaskFormer(tf.keras.Model):
+  """Maskformer"""
+  def __init__(self,
+               input_specs,
+               fpn_feat_dims=256,
+               data_format=None,
+               dilation_rate=(1, 1),
+               groups=1,
+               activation='relu',
+               use_bias=False,
+               kernel_initializer="glorot_uniform",
+               bias_initializer="zeros",
+               kernel_regularizer=None,
+               bias_regularizer=None,
+               activity_regularizer=None,
+               kernel_constraint=None,
+               bias_constraint=None,
+               num_queries=100,
+               hidden_size=256,
+               num_encoder_layers=0,
+               num_decoder_layers=6,
+               dropout_rate=0.1,
+               backbone_endpoint_name='5',
+               num_classes=133,
+               batch_size=1,
+               **kwargs):
+    self._input_specs = input_specs
+    self._batch_size = batch_size
+    self._num_classes = num_classes
+
+    # Pixel Deocder paramters.
+    self._fpn_feat_dims = fpn_feat_dims
+    self._data_format = data_format
+    self._dilation_rate = dilation_rate
+    self._groups = groups
+    self._activation = activation
+    self._use_bias = use_bias
+    self._kernel_initializer = kernel_initializer
+    self._bias_initializer = bias_initializer
+    self._kernel_regularizer = kernel_regularizer
+    self._bias_regularizer = bias_regularizer
+    self._activity_regularizer = activity_regularizer
+    self._kernel_constraint = kernel_constraint
+    self._bias_constraint = bias_constraint
+
+    # DETRTransformer parameters.
+    self._num_encoder_layers = num_encoder_layers
+    self._num_decoder_layers = num_decoder_layers
+    self._num_queries = num_queries
+    self._hidden_size = hidden_size
+    self._dropout_rate = dropout_rate
+    self._backbone_endpoint = backbone_endpoint_name
+    
+
+    super(MaskFormer, self).__init__(**kwargs)
+
+  def build(self, image_shape):
+    #backbone
+    print("[Build MaskFormer] image shape: ", image_shape)
+    
+    self.backbone = resnet.ResNet(50, input_specs=self._input_specs, bn_trainable=False)
+    #decoders
+    self.pixel_decoder = TransformerFPN(batch_size = self._batch_size,
+                            fpn_feat_dims=self._fpn_feat_dims,
+                            data_format=self._data_format,
+                            dilation_rate=self._dilation_rate,
+                            groups=self._groups,
+                            activation=self._activation,
+                            use_bias=self._use_bias,
+                            kernel_initializer=self._kernel_initializer,
+                            bias_initializer=self._bias_initializer,
+                            kernel_regularizer=self._kernel_regularizer,
+                            bias_regularizer=self._bias_regularizer,
+                            activity_regularizer=self._activity_regularizer,
+                            kernel_constraint=self._kernel_constraint,
+                            bias_constraint=self._bias_constraint)
+    self.transformer = MaskFormerTransformer(backbone_endpoint_name=self._backbone_endpoint,
+                                            batch_size=self._batch_size,
+                                            num_queries=self._num_queries,
+                                            hidden_size=self._hidden_size,
+                                            num_encoder_layers=self._num_encoder_layers,
+                                            num_decoder_layers=self._num_decoder_layers,
+                                            dropout_rate=self._dropout_rate)
+    self.head = MLPHead(num_classes=self._num_classes, 
+                        hidden_dim=self._hidden_size, 
+                        mask_dim=self._fpn_feat_dims)
+   
+    super(MaskFormer, self).build(image_shape)
+ 
+  def process_feature_maps(self, maps):
+    new_dict = {}
+    for k in maps.keys():
+      new_dict[k[0]] = maps[k]
+    return new_dict
+
+  def call(self, image):
+    # image = tf.reshape(image, [1, 800, 1135, 3])
+    # image = tf.ones((1, 640, 640, 3))
+    backbone_feature_maps = self.backbone(image)
+    mask_features, transformer_enc_feat = self.pixel_decoder(self.process_feature_maps(backbone_feature_maps))
+    transformer_features = self.transformer({"features": transformer_enc_feat})
+    seg_pred = self.head({"per_pixel_embeddings" : mask_features,
+                          "per_segment_embeddings": transformer_features})
+    
+    return seg_pred
diff --git a/models/official/projects/maskformer/modeling/__pycache__/__init__.cpython-38.pyc b/models/official/projects/maskformer/modeling/__pycache__/__init__.cpython-38.pyc
new file mode 100644
index 00000000..5bed6795
Binary files /dev/null and b/models/official/projects/maskformer/modeling/__pycache__/__init__.cpython-38.pyc differ
diff --git a/models/official/projects/maskformer/modeling/__pycache__/__init__.cpython-39.pyc b/models/official/projects/maskformer/modeling/__pycache__/__init__.cpython-39.pyc
index beb927fc..b0dc2b14 100644
Binary files a/models/official/projects/maskformer/modeling/__pycache__/__init__.cpython-39.pyc and b/models/official/projects/maskformer/modeling/__pycache__/__init__.cpython-39.pyc differ
diff --git a/models/official/projects/maskformer/modeling/__pycache__/maskformer.cpython-38.pyc b/models/official/projects/maskformer/modeling/__pycache__/maskformer.cpython-38.pyc
new file mode 100644
index 00000000..dc0cfe7d
Binary files /dev/null and b/models/official/projects/maskformer/modeling/__pycache__/maskformer.cpython-38.pyc differ
diff --git a/models/official/projects/maskformer/modeling/__pycache__/maskformer.cpython-39.pyc b/models/official/projects/maskformer/modeling/__pycache__/maskformer.cpython-39.pyc
index dbb2923a..2c8d9a59 100644
Binary files a/models/official/projects/maskformer/modeling/__pycache__/maskformer.cpython-39.pyc and b/models/official/projects/maskformer/modeling/__pycache__/maskformer.cpython-39.pyc differ
diff --git a/models/official/projects/maskformer/modeling/backbone/backbone_test.py b/models/official/projects/maskformer/modeling/backbone/backbone_test.py
index fdf65e8b..c07f6834 100644
--- a/models/official/projects/maskformer/modeling/backbone/backbone_test.py
+++ b/models/official/projects/maskformer/modeling/backbone/backbone_test.py
@@ -1,5 +1,4 @@
 from absl.testing import parameterized
-import math
 import tensorflow as tf
 
 from official.vision.modeling.backbones import resnet
@@ -7,35 +6,26 @@
 class ResNetTest(parameterized.TestCase, tf.test.TestCase):
     
     @parameterized.parameters(
-        (1, 640, 640, 50), (1, 608, 911, 50)
+        (640, 50),
     )
-    def test_network_creation(self, batch_size, width, height, model_id):
+    def test_network_creation(self, input_size, model_id):
         tf.keras.backend.set_image_data_format('channels_last')
 
         network = resnet.ResNet(model_id=model_id)
         self.assertEqual(network.count_params(), 23561152)
 
-        inputs = tf.keras.Input(shape=(width, height, 3), batch_size=1)
+        inputs = tf.keras.Input(shape=(input_size, input_size, 3), batch_size=1)
         endpoints = network(inputs)
 
-        for x in endpoints.values():
-           print(x.shape)
-
-        self.assertAllEqual(
-           [batch_size, int(math.ceil(width / 2**2)), int(math.ceil(height / 2**2)), 256]
-           , endpoints['2'].shape.as_list(), "failure on 2")
-        
         self.assertAllEqual(
-           [batch_size, int(math.ceil(width / 2**3)), int(math.ceil(height / 2**3)), 512]
-           , endpoints['3'].shape.as_list(), "failure on 3")
-        
+            [1, 80, 80, 512]
+        , endpoints['3'].shape.as_list(), "failure on 3")
         self.assertAllEqual(
-           [batch_size, int(math.ceil(width / 2**4)), int(math.ceil(height / 2**4)), 1024]
-           , endpoints['4'].shape.as_list(), "failure on 4")
-        
+            [1,  40, 40, 1024]
+        , endpoints['4'].shape.as_list(), "failure on 4")
         self.assertAllEqual(
-           [batch_size, int(math.ceil(width / 2**5)), int(math.ceil(height / 2**5)), 2048]
-           , endpoints['5'].shape.as_list(), "failure on 5")
+            [1, 20, 20, 2048]
+        , endpoints['5'].shape.as_list(), "failure on 5")
 
 if __name__ == '__main__':
   tf.test.main()
\ No newline at end of file
diff --git a/models/official/projects/maskformer/modeling/decoder/.ipynb_checkpoints/transformer_decoder-checkpoint.py b/models/official/projects/maskformer/modeling/decoder/.ipynb_checkpoints/transformer_decoder-checkpoint.py
new file mode 100644
index 00000000..be1ac1ed
--- /dev/null
+++ b/models/official/projects/maskformer/modeling/decoder/.ipynb_checkpoints/transformer_decoder-checkpoint.py
@@ -0,0 +1,93 @@
+import math
+import tensorflow as tf
+
+from official.projects.detr.modeling.detr import position_embedding_sine
+from official.projects.detr.modeling import transformer
+from official.modeling import tf_utils
+from official.projects.maskformer.modeling.decoder.detr_transformer import DETRTransformer
+  
+class MaskFormerTransformer(tf.keras.layers.Layer):
+    def __init__(self,
+               backbone_endpoint_name,
+               batch_size,
+               num_queries,
+               hidden_size,
+               num_encoder_layers=6,
+               num_decoder_layers=6,
+               dropout_rate=0.1,
+               **kwargs):
+        super().__init__(**kwargs)
+        
+        self._backbone_endpoint_name = backbone_endpoint_name
+        
+        # Embeddings parameters.
+        self._batch_size = batch_size
+        self._num_queries = num_queries
+        self._hidden_size = hidden_size
+        if hidden_size % 2 != 0:
+            raise ValueError("hidden_size must be a multiple of 2.")
+
+
+        # DETRTransformer parameters.
+        self._num_encoder_layers = num_encoder_layers
+        self._num_decoder_layers = num_decoder_layers
+        self._dropout_rate = dropout_rate
+  
+
+
+    def build(self, input_shape):
+        self._transformer = DETRTransformer(num_encoder_layers=self._num_encoder_layers,
+                                            num_decoder_layers=self._num_decoder_layers,
+                                            dropout_rate=self._dropout_rate)
+
+        self._query_embeddings = self.add_weight(
+            "detr/query_embeddings",
+            shape=[self._num_queries, self._hidden_size],
+            initializer=tf.keras.initializers.RandomNormal(mean=0., stddev=1.),
+            dtype=tf.float32)
+        
+        sqrt_k = math.sqrt(1.0 / self._hidden_size)
+        
+        self._input_proj = tf.keras.layers.Conv2D(
+            self._hidden_size, 1, name="detr/conv2d")
+
+    def _generate_image_mask(self, features: tf.Tensor) -> tf.Tensor:
+        """Generates image mask from input image."""
+        mask = tf.zeros([features.shape[0],features.shape[1],features.shape[2]])
+        mask = tf.cast(mask, dtype = bool)
+        return mask
+    
+    def call(self, inputs):
+        features = inputs['features']
+
+        mask = self._generate_image_mask(features)
+
+        pos_embed = position_embedding_sine(
+            mask, num_pos_features=self._hidden_size)
+        pos_embed = tf.reshape(pos_embed, [self._batch_size, -1, self._hidden_size])
+
+        features = tf.reshape(
+            self._input_proj(features), [self._batch_size, -1, self._hidden_size])
+
+        decoded_list = self._transformer({
+            "inputs":
+                features,
+            "targets":
+                tf.tile(
+                    tf.expand_dims(self._query_embeddings, axis=0),
+                    (self._batch_size, 1, 1)),
+            "pos_embed": pos_embed,
+            "mask": None,
+        })
+
+        return decoded_list
+    
+    def get_config(self):
+        return {
+            "backbone_endpoint_name": self._backbone_endpoint_name,
+            "num_queries": self._num_queries,
+            "hidden_size": self._hidden_size,
+            "num_encoder_layers": self._num_encoder_layers,
+            "num_decoder_layers": self._num_decoder_layers,
+            "dropout_rate": self._dropout_rate,
+        }
diff --git a/models/official/projects/maskformer/modeling/decoder/.ipynb_checkpoints/transformer_pixel_decoder-checkpoint.py b/models/official/projects/maskformer/modeling/decoder/.ipynb_checkpoints/transformer_pixel_decoder-checkpoint.py
new file mode 100644
index 00000000..60f32abb
--- /dev/null
+++ b/models/official/projects/maskformer/modeling/decoder/.ipynb_checkpoints/transformer_pixel_decoder-checkpoint.py
@@ -0,0 +1,197 @@
+import tensorflow as tf
+import tensorflow_addons as tfa
+from official.vision.ops.spatial_transform_ops import nearest_upsampling
+from official.projects.detr.modeling.detr import position_embedding_sine
+from official.projects.detr.modeling.transformer import TransformerEncoder
+from official.projects.maskformer.modeling.decoder.detr_transformer import DETRTransformer
+
+class TransformerFPN(tf.keras.layers.Layer):
+    """MaskFormer Feature Pyramid Networks."""
+
+    def __init__(self,
+                 batch_size = 16,
+                 fpn_feat_dims=256,
+                 data_format=None,
+                 dilation_rate=(1, 1),
+                 groups=1,
+                 activation='relu',
+                 use_bias=False,
+                 kernel_initializer="glorot_uniform",
+                 bias_initializer="zeros",
+                 kernel_regularizer=None,
+                 bias_regularizer=None,
+                 activity_regularizer=None,
+                 kernel_constraint=None,
+                 bias_constraint=None,
+                 **kwargs):
+        """FPN initialization function.
+        Args:
+          fpn_feat_dims: `int`, Feature dimension of the fpn.
+          
+          TODO: fill in new args
+          
+        """
+        super(TransformerFPN, self).__init__(**kwargs)
+
+        self._batch_size = batch_size
+
+        # conv2d params
+        self._fpn_feat_dims = fpn_feat_dims
+        self._data_format = data_format
+        self._dilation_rate = dilation_rate
+        self._groups = groups
+        self._activation = activation
+        self._use_bias = use_bias
+        self._kernel_initializer = kernel_initializer
+        self._bias_initializer = bias_initializer
+        self._kernel_regularizer = kernel_regularizer
+        self._bias_regularizer = bias_regularizer
+        self._activity_regularizer = activity_regularizer
+        self._kernel_constraint = kernel_constraint
+        self._bias_constraint = bias_constraint
+        
+
+        if tf.keras.backend.image_data_format() == 'channels_last':
+            # format: (batch_size, height, width, channels)
+            self._channels_last = True
+        else:
+            # format: (batch_size, channels, width, height)
+            self._channels_last = False
+
+    def build(self, multilevel_features):
+        conv_args = {
+            "data_format": self._data_format,
+            "dilation_rate": self._dilation_rate,
+            "groups": self._groups,
+            "activation": None,
+            "use_bias": self._use_bias,
+            "kernel_initializer": self._kernel_initializer,
+            "bias_initializer": self._bias_initializer,
+            "kernel_regularizer": self._kernel_regularizer,
+            "bias_regularizer": self._bias_regularizer,
+            "activity_regularizer": self._activity_regularizer,
+            "kernel_constraint": self._kernel_constraint,
+            "bias_constraint": self._bias_constraint
+        }
+        
+        input_levels = list(multilevel_features.keys())
+        levels = input_levels[:-1]
+
+        self._input_proj = tf.keras.layers.Conv2D(filters=self._fpn_feat_dims,
+                                                  kernel_size=(1, 1),
+                                                  padding='same',
+                                                  name = f"input_proj",
+                                                  use_bias = True)
+        self._transformer_encoder = TransformerEncoder(norm_first=False,
+                                                       dropout_rate = .1,
+                                                       num_layers=6)
+        self._interpolations = []                                               
+        self._conv2d_op_lateral = []
+        self._lateral_groupnorm = []
+        for level in levels[::-1]:
+            lateral = tf.keras.layers.Conv2D(filters=self._fpn_feat_dims,
+                                             kernel_size=(1, 1),
+                                             padding='same',
+                                             name = f"lateral_{level}",
+                                             **conv_args)
+            lateral_norm = tf.keras.layers.GroupNormalization(name = f"lateral_norm_{level}")
+            interpolate = tf.keras.layers.Resizing(
+              multilevel_features[level][1], multilevel_features[level][2], interpolation = "nearest")
+
+            self._conv2d_op_lateral.append(lateral)
+            self._lateral_groupnorm.append(lateral_norm)
+            self._interpolations.append(interpolate)
+
+        self._conv2d_op_down = []
+        self._down_groupnorm = []
+        down = tf.keras.layers.Conv2D(filters=self._fpn_feat_dims,
+                                      strides=(1, 1),
+                                      kernel_size=(3, 3),
+                                      padding='same',
+                                      name = "down_initial_conv",
+                                      **conv_args)
+        down_norm = tf.keras.layers.GroupNormalization(name = "down_initial_norm")
+        self._down_groupnorm.append(down_norm)
+        self._conv2d_op_down.append(down)
+        
+        for level in levels[::-1]:
+            down = tf.keras.layers.Conv2D(filters=self._fpn_feat_dims,
+                                          strides=(1, 1),
+                                          kernel_size=(3, 3),
+                                          padding='same',
+                                          name = f"down_{level}",
+                                          **conv_args)
+            down_norm = tf.keras.layers.GroupNormalization(name = f"down_norm_{level}")
+            self._conv2d_op_down.append(down)
+            self._down_groupnorm.append(down_norm)
+
+        self._conv2d_op_mask = tf.keras.layers.Conv2D(
+            filters=self._fpn_feat_dims,
+            kernel_size=(3, 3),
+            padding='same',
+            name = "mask_proj",
+            **conv_args)
+        
+        self._relu1 = tf.keras.layers.ReLU()
+        self._relu2 = tf.keras.layers.ReLU()
+
+        if not self._channels_last:
+            self._permute1 = tf.keras.layers.Permute((2, 3, 1))
+            self._permute2 = tf.keras.layers.Permute((2, 3, 1))
+
+        super(TransformerFPN, self).build(multilevel_features)
+    
+    def _generate_image_mask(self, features: tf.Tensor) -> tf.Tensor:
+        """Generates image mask from input image."""
+        mask = tf.zeros([features.shape[0],features.shape[1],features.shape[2]])
+        mask = tf.cast(mask, dtype = bool)
+        return mask
+
+    def call(self, multilevel_features):
+        """Returns the FPN features for a given multilevel features.
+        Args:
+          multilevel_features: a `dict` containing `int` keys for continuous feature
+            levels, e.g., [2, 3, 4, 5]. The values are corresponding features with
+            shape [batch_size, height_l, width_l, num_filters].
+        Returns:
+          Mask projection
+        """
+        input_levels = list(multilevel_features.keys())
+
+        feat = multilevel_features[input_levels[-1]]
+        
+        if not self._channels_last:
+            feat = self._permute_1(feat)
+        
+        mask = self._generate_image_mask(feat)
+        pos_embed = position_embedding_sine(
+            mask, num_pos_features=self._fpn_feat_dims)
+
+        features = self._input_proj(feat)
+        
+        transformer = self._transformer_encoder(features, None, pos_embed)
+        
+
+        down = self._conv2d_op_down[0](transformer)
+        down = self._down_groupnorm[0](down)
+        down = self._relu1(down)
+
+        levels = input_levels[:-1]
+        for i, level in enumerate(levels[::-1]):
+            feat = multilevel_features[level]
+
+            if not self._channels_last:
+                feat = self._permute_2(multilevel_features[level])
+
+            lateral = self._conv2d_op_lateral[i](feat)
+            lateral = self._lateral_groupnorm[i](lateral)
+
+            down = self._interpolations[i](down) + lateral
+
+            down = self._conv2d_op_down[i + 1](down)
+            down = self._down_groupnorm[i+1](down)
+            down = self._relu2(down)
+
+        mask = self._conv2d_op_mask(down)
+
+        return mask, transformer
\ No newline at end of file
diff --git a/models/official/projects/maskformer/modeling/decoder/__pycache__/__init__.cpython-39.pyc b/models/official/projects/maskformer/modeling/decoder/__pycache__/__init__.cpython-39.pyc
index 8c1dfc2e..c1bea057 100644
Binary files a/models/official/projects/maskformer/modeling/decoder/__pycache__/__init__.cpython-39.pyc and b/models/official/projects/maskformer/modeling/decoder/__pycache__/__init__.cpython-39.pyc differ
diff --git a/models/official/projects/maskformer/modeling/decoder/__pycache__/detr_transformer.cpython-39.pyc b/models/official/projects/maskformer/modeling/decoder/__pycache__/detr_transformer.cpython-39.pyc
new file mode 100644
index 00000000..1e3e4ae3
Binary files /dev/null and b/models/official/projects/maskformer/modeling/decoder/__pycache__/detr_transformer.cpython-39.pyc differ
diff --git a/models/official/projects/maskformer/modeling/decoder/__pycache__/pixel_decoder.cpython-39.pyc b/models/official/projects/maskformer/modeling/decoder/__pycache__/pixel_decoder.cpython-39.pyc
index 7ac47c71..f34aa7a9 100644
Binary files a/models/official/projects/maskformer/modeling/decoder/__pycache__/pixel_decoder.cpython-39.pyc and b/models/official/projects/maskformer/modeling/decoder/__pycache__/pixel_decoder.cpython-39.pyc differ
diff --git a/models/official/projects/maskformer/modeling/decoder/__pycache__/transformer_decoder.cpython-39.pyc b/models/official/projects/maskformer/modeling/decoder/__pycache__/transformer_decoder.cpython-39.pyc
index 43e3e156..07c69390 100644
Binary files a/models/official/projects/maskformer/modeling/decoder/__pycache__/transformer_decoder.cpython-39.pyc and b/models/official/projects/maskformer/modeling/decoder/__pycache__/transformer_decoder.cpython-39.pyc differ
diff --git a/models/official/projects/maskformer/modeling/decoder/__pycache__/transformer_pixel_decoder.cpython-39.pyc b/models/official/projects/maskformer/modeling/decoder/__pycache__/transformer_pixel_decoder.cpython-39.pyc
new file mode 100644
index 00000000..2c6fb9c2
Binary files /dev/null and b/models/official/projects/maskformer/modeling/decoder/__pycache__/transformer_pixel_decoder.cpython-39.pyc differ
diff --git a/models/official/projects/maskformer/modeling/decoder/detr_transformer.py b/models/official/projects/maskformer/modeling/decoder/detr_transformer.py
new file mode 100644
index 00000000..8b3f0d50
--- /dev/null
+++ b/models/official/projects/maskformer/modeling/decoder/detr_transformer.py
@@ -0,0 +1,83 @@
+import math
+import tensorflow as tf
+
+from official.projects.detr.modeling.detr import position_embedding_sine
+from official.projects.detr.modeling import transformer
+from official.modeling import tf_utils
+
+class DETRTransformer(tf.keras.layers.Layer):
+  """Encoder and Decoder of DETR."""
+
+  def __init__(self, num_encoder_layers=6, num_decoder_layers=6,
+               dropout_rate=0.1, **kwargs):
+    super().__init__(**kwargs)
+    self._dropout_rate = dropout_rate
+    self._num_encoder_layers = num_encoder_layers
+    self._num_decoder_layers = num_decoder_layers
+
+  def build(self, input_shape=None):
+    if self._num_encoder_layers > 0:
+      self._encoder = transformer.TransformerEncoder(
+          attention_dropout_rate=self._dropout_rate,
+          dropout_rate=self._dropout_rate,
+          intermediate_dropout=self._dropout_rate,
+          norm_first=False,
+          num_layers=self._num_encoder_layers)
+    else:
+      self._encoder = None
+
+    self._decoder = transformer.TransformerDecoder(
+        attention_dropout_rate=self._dropout_rate,
+        dropout_rate=self._dropout_rate,
+        intermediate_dropout=self._dropout_rate,
+        norm_first=False,
+        num_layers=self._num_decoder_layers)
+    super().build(input_shape)
+
+  def get_config(self):
+    return {
+        "num_encoder_layers": self._num_encoder_layers,
+        "num_decoder_layers": self._num_decoder_layers,
+        "dropout_rate": self._dropout_rate,
+    }
+
+  def call(self, inputs):
+    sources = inputs["inputs"]
+    targets = inputs["targets"]
+    pos_embed = inputs["pos_embed"]
+    mask = inputs["mask"]
+    input_shape = tf_utils.get_shape_list(sources)
+    if mask is not None:
+      source_attention_mask = tf.tile(
+          tf.expand_dims(mask, axis=1), [1, input_shape[1], 1])
+    else:
+      source_attention_mask = None
+    if self._encoder is not None:
+      memory = self._encoder(
+          sources, attention_mask=source_attention_mask, pos_embed=pos_embed)
+    else:
+      memory = sources
+
+    target_shape = tf_utils.get_shape_list(targets)
+    target_shape = tf.shape(targets)
+    
+    if mask is not None:
+      cross_attention_mask = tf.tile(
+          tf.expand_dims(mask, axis=1), [1, target_shape[1], 1])
+      self_attention_mask=tf.ones(
+            (target_shape[0], target_shape[1], target_shape[1]))
+    else:
+      cross_attention_mask = None
+      self_attention_mask = None
+      
+    decoded = self._decoder(
+        tf.zeros_like(targets),
+        memory,
+        # TODO(b/199545430): self_attention_mask could be set to None when this
+        # bug is resolved. Passing ones for now.
+        self_attention_mask=self_attention_mask,
+        cross_attention_mask=cross_attention_mask,
+        return_all_decoder_outputs=False,
+        input_pos_embed=targets,
+        memory_pos_embed=pos_embed)
+    return decoded
\ No newline at end of file
diff --git a/models/official/projects/maskformer/modeling/decoder/pixel_decoder.py b/models/official/projects/maskformer/modeling/decoder/pixel_decoder.py
index 93d1806b..fb9584b5 100644
--- a/models/official/projects/maskformer/modeling/decoder/pixel_decoder.py
+++ b/models/official/projects/maskformer/modeling/decoder/pixel_decoder.py
@@ -1,66 +1,118 @@
+"""Feature Pyramid Networks used in MaskFormer."""
 import tensorflow as tf
 import tensorflow_addons as tfa
 from official.vision.ops.spatial_transform_ops import nearest_upsampling
 
 class Fpn(tf.keras.layers.Layer):
-    """Feature pyramid networks."""
+    """MaskFormer Feature Pyramid Networks."""
 
     def __init__(self,
                  fpn_feat_dims=256,
+                 data_format=None,
+                 dilation_rate=(1, 1),
+                 groups=1,
+                 activation='relu',
+                 use_bias=False,
+                 kernel_initializer="glorot_uniform",
+                 bias_initializer="zeros",
+                 kernel_regularizer=None,
+                 bias_regularizer=None,
+                 activity_regularizer=None,
+                 kernel_constraint=None,
+                 bias_constraint=None,
                  **kwargs):
         """FPN initialization function.
-
         Args:
-        fpn_feat_dims: Feature dimension of the fpn
+          fpn_feat_dims: `int`, Feature dimension of the fpn.
+          
+          TODO: fill in new args
+          
         """
         super(Fpn, self).__init__(**kwargs)
 
+        # conv2d params
         self._fpn_feat_dims = fpn_feat_dims
-        # TODO(Isaac): Add Conv2D parameteres to constructor.
-        # TODO(Isaac): Add GroupNormalization parameters to constructor.
+        self._data_format = data_format
+        self._dilation_rate = dilation_rate
+        self._groups = groups
+        self._activation = activation
+        self._use_bias = use_bias
+        self._kernel_initializer = kernel_initializer
+        self._bias_initializer = bias_initializer
+        self._kernel_regularizer = kernel_regularizer
+        self._bias_regularizer = bias_regularizer
+        self._activity_regularizer = activity_regularizer
+        self._kernel_constraint = kernel_constraint
+        self._bias_constraint = bias_constraint
+        
 
         if tf.keras.backend.image_data_format() == 'channels_last':
+            # format: (batch_size, height, width, channels)
             self._channels_last = True
         else:
+            # format: (batch_size, channels, width, height)
             self._channels_last = False
 
     def build(self, multilevel_features):
-        # TODO(Isaac): Add Conv2D parameters to layers.
-        # TODO(Isaac): Add GroupNormalization parameters to layers.
-
+        conv_args = {
+            "data_format": self._data_format,
+            "dilation_rate": self._dilation_rate,
+            "groups": self._groups,
+            "activation": self._activation,
+            "use_bias": self._use_bias,
+            "kernel_initializer": self._kernel_initializer,
+            "bias_initializer": self._bias_initializer,
+            "kernel_regularizer": self._kernel_regularizer,
+            "bias_regularizer": self._bias_regularizer,
+            "activity_regularizer": self._activity_regularizer,
+            "kernel_constraint": self._kernel_constraint,
+            "bias_constraint": self._bias_constraint
+        }
+        
         input_levels = list(multilevel_features.keys())
         levels = input_levels[:-1]
 
         self._conv2d_op_lateral = []
-        for _ in levels[::-1]:
-            lateral = tf.keras.layers.Conv2D(
-                filters=self._fpn_feat_dims,
-                kernel_size=(1, 1),
-                padding='same')
+        self._lateral_groupnorm = []
+        for level in levels[::-1]:
+            lateral = tf.keras.layers.Conv2D(filters=self._fpn_feat_dims,
+                                             kernel_size=(1, 1),
+                                             padding='same',
+                                             name = f"lateral_{level}",
+                                             **conv_args)
+            lateral_norm = tf.keras.layers.GroupNormalization(name = f"lateral_norm_{level}")
             self._conv2d_op_lateral.append(lateral)
+            self._lateral_groupnorm.append(lateral_norm)
 
         self._conv2d_op_down = []
-        down = tf.keras.layers.Conv2D(
-            filters=self._fpn_feat_dims,
-            strides=(1, 1),
-            kernel_size=(3, 3),
-            padding='same')
+        self._down_groupnorm = []
+        down = tf.keras.layers.Conv2D(filters=self._fpn_feat_dims,
+                                      strides=(1, 1),
+                                      kernel_size=(3, 3),
+                                      padding='same',
+                                      name = "down_initial_conv",
+                                      **conv_args)
+        down_norm = tf.keras.layers.GroupNormalization(name = "down_initial_norm")
+        self._down_groupnorm.append(down_norm)
         self._conv2d_op_down.append(down)
-        for _ in levels[::-1]:
-            down = tf.keras.layers.Conv2D(
-                filters=self._fpn_feat_dims,
-                strides=(1, 1),
-                kernel_size=(3, 3),
-                padding='same')
+        
+        for level in levels[::-1]:
+            down = tf.keras.layers.Conv2D(filters=self._fpn_feat_dims,
+                                          strides=(1, 1),
+                                          kernel_size=(3, 3),
+                                          padding='same',
+                                          name = f"down_{level}",
+                                          **conv_args)
+            down_norm = tf.keras.layers.GroupNormalization(name = f"down_norm_{level}")
             self._conv2d_op_down.append(down)
+            self._down_groupnorm.append(down_norm)
 
         self._conv2d_op_mask = tf.keras.layers.Conv2D(
             filters=self._fpn_feat_dims,
             kernel_size=(3, 3),
-            padding='same')
-
-        self._group_norm1 = tfa.layers.GroupNormalization()
-        self._group_norm2 = tfa.layers.GroupNormalization()
+            padding='same',
+            name = "mask_proj",
+            **conv_args)
         
         self._relu1 = tf.keras.layers.ReLU()
         self._relu2 = tf.keras.layers.ReLU()
@@ -88,7 +140,7 @@ def call(self, multilevel_features):
             feat = self._permute_1(feat)
 
         down = self._conv2d_op_down[0](feat)
-        down = self._group_norm1(down)
+        down = self._down_groupnorm[0](down)
         down = self._relu1(down)
 
         levels = input_levels[:-1]
@@ -99,17 +151,15 @@ def call(self, multilevel_features):
                 feat = self._permute_2(multilevel_features[level])
 
             lateral = self._conv2d_op_lateral[i](feat)
-
-            upsample = nearest_upsampling(down, 2)
-            
-            # When width or height is odd there is a shape mismatch with scale=2.
-            if (upsample.shape != lateral.shape):
-                upsample = upsample[:,:lateral.shape[1],:lateral.shape[2],:]
-
-            down = upsample + lateral
+            lateral = self._lateral_groupnorm[i](lateral)
+            print(down.shape)
+            print(nearest_upsampling(down, 2).shape)
+            print(lateral.shape)
+            exit()
+            down = nearest_upsampling(down, 2) + lateral
 
             down = self._conv2d_op_down[i + 1](down)
-            down = self._group_norm2(down)
+            down = self._down_groupnorm[i+1](down)
             down = self._relu2(down)
 
         mask = self._conv2d_op_mask(down)
diff --git a/models/official/projects/maskformer/modeling/decoder/pixel_decoder_test.py b/models/official/projects/maskformer/modeling/decoder/pixel_decoder_test.py
index 470f881b..14461e25 100644
--- a/models/official/projects/maskformer/modeling/decoder/pixel_decoder_test.py
+++ b/models/official/projects/maskformer/modeling/decoder/pixel_decoder_test.py
@@ -18,55 +18,47 @@
 
 class FpnTest(parameterized.TestCase, tf.test.TestCase):
 
-    @parameterized.named_parameters(('test1', "coco_stuff", 256), ('test2', "coco_panoptic", 256))
-    def test_pass_through(self, testcase_input_name, dim):
-
-        testcase_backbone_inputs = {
-            "coco_stuff": {
-                "2": tf.ones([1, 160, 160, 256]),
-                "3": tf.ones([1, 80, 80, 512]),
-                "4": tf.ones([1, 40, 40, 1024]),
-                "5": tf.ones([1, 20, 20, 2048])
-            },
-            "coco_panoptic": {
-                "2": tf.ones([1, 152, 228, 256]),
-                "3": tf.ones([1, 76, 114, 512]),
-                "4": tf.ones([1, 38, 57, 1024]),
-                "5": tf.ones([1, 19, 29, 2048])
-            }
+    @parameterized.named_parameters(('test1', 256),)
+    def test_pass_through(self, dim):
+
+        multilevel_features = {
+            "2": tf.ones([1, 160, 160, 256]),
+            "3": tf.ones([1, 80, 80, 512]),
+            "4": tf.ones([1, 40, 40, 1024]),
+            "5": tf.ones([1, 20, 20, 2048])
         }
 
         # TODO(Isaac): Add the additional parameters.
         decoder = Fpn(fpn_feat_dims=dim)
-        output_mask = decoder(testcase_backbone_inputs[testcase_input_name])
+        output_mask = decoder(multilevel_features)
 
-        expected_output_mask = testcase_backbone_inputs[testcase_input_name]["2"].shape.as_list()
+        expected_output_mask = multilevel_features["2"].shape.as_list()
 
         self.assertAllEqual(output_mask.shape.as_list(), expected_output_mask)
 
-    # @combinations.generate(
-    #     combinations.combine(
-    #         strategy=[
-    #             strategy_combinations.cloud_tpu_strategy,
-    #             strategy_combinations.one_device_strategy_gpu,
-    #         ],
-    #         use_sync_bn=[False, True],
-    #     ))
-    # def test_sync_bn_multiple_devices(self, strategy, use_sync_bn):
-    #     """Test for sync bn on TPU and GPU devices."""
-
-    #     tf.keras.backend.set_image_data_format('channels_last')
-
-    #     with strategy.scope():
-
-    #         multilevel_features = {
-    #             2: tf.ones([1, 160, 160, 256]),
-    #             3: tf.ones([1, 80, 80, 512]),
-    #             4: tf.ones([1, 40, 40, 1024]),
-    #             5: tf.ones([1, 20, 20, 2048])}
-
-    #         decoder = Fpn()
-    #         _ = decoder(multilevel_features)
+    @combinations.generate(
+        combinations.combine(
+            strategy=[
+                strategy_combinations.cloud_tpu_strategy,
+                strategy_combinations.one_device_strategy_gpu,
+            ],
+            use_sync_bn=[False, True],
+        ))
+    def test_sync_bn_multiple_devices(self, strategy, use_sync_bn):
+        """Test for sync bn on TPU and GPU devices."""
+
+        tf.keras.backend.set_image_data_format('channels_last')
+
+        with strategy.scope():
+
+            multilevel_features = {
+                2: tf.ones([1, 160, 160, 256]),
+                3: tf.ones([1, 80, 80, 512]),
+                4: tf.ones([1, 40, 40, 1024]),
+                5: tf.ones([1, 20, 20, 2048])}
+
+            decoder = Fpn()
+            _ = decoder(multilevel_features)
 
 
 if __name__ == '__main__':
diff --git a/models/official/projects/maskformer/modeling/decoder/transformer_decoder.py b/models/official/projects/maskformer/modeling/decoder/transformer_decoder.py
index b882e9a2..721aee39 100644
--- a/models/official/projects/maskformer/modeling/decoder/transformer_decoder.py
+++ b/models/official/projects/maskformer/modeling/decoder/transformer_decoder.py
@@ -1,16 +1,17 @@
 import math
 import tensorflow as tf
 
-from official.projects.detr.modeling.detr import DETRTransformer, position_embedding_sine
-
+from official.projects.detr.modeling.detr import position_embedding_sine
+from official.projects.detr.modeling import transformer
+from official.modeling import tf_utils
+from official.projects.maskformer.modeling.decoder.detr_transformer import DETRTransformer
+  
 class MaskFormerTransformer(tf.keras.layers.Layer):
     def __init__(self,
                backbone_endpoint_name,
-               batch_size,
                num_queries,
                hidden_size,
-               num_classes,
-               num_encoder_layers=6,
+               num_encoder_layers=0,
                num_decoder_layers=6,
                dropout_rate=0.1,
                **kwargs):
@@ -19,13 +20,11 @@ def __init__(self,
         self._backbone_endpoint_name = backbone_endpoint_name
         
         # Embeddings parameters.
-        self._batch_size = batch_size
         self._num_queries = num_queries
         self._hidden_size = hidden_size
         if hidden_size % 2 != 0:
             raise ValueError("hidden_size must be a multiple of 2.")
 
-        self._num_classes = num_classes
 
         # DETRTransformer parameters.
         self._num_encoder_layers = num_encoder_layers
@@ -46,39 +45,28 @@ def build(self, input_shape):
             dtype=tf.float32)
         
         sqrt_k = math.sqrt(1.0 / self._hidden_size)
-
-        # TODO(ibrahim): NOT USED, remove num classes parameters as well.
-        self._class_embed = tf.keras.layers.Dense(
-            self._num_classes,
-            kernel_initializer=tf.keras.initializers.RandomUniform(-sqrt_k, sqrt_k),
-            name="detr/cls_dense")
         
         self._input_proj = tf.keras.layers.Conv2D(
             self._hidden_size, 1, name="detr/conv2d")
 
-    def _generate_image_mask(self, inputs: tf.Tensor,
-                            target_shape: tf.Tensor) -> tf.Tensor:
+    def _generate_image_mask(self, features: tf.Tensor) -> tf.Tensor:
         """Generates image mask from input image."""
-        mask = tf.expand_dims(
-            tf.cast(tf.not_equal(tf.reduce_sum(inputs, axis=-1), 0), inputs.dtype),
-            axis=-1)
-        mask = tf.image.resize(
-            mask, target_shape, method=tf.image.ResizeMethod.NEAREST_NEIGHBOR)
+        mask = tf.zeros([features.shape[0],features.shape[1],features.shape[2]])
+        mask = tf.cast(mask, dtype = bool)
         return mask
     
     def call(self, inputs):
-        input_image = inputs['image']
-        features = inputs['features'][self._backbone_endpoint_name]
+        features = inputs['features']
+        batch_size = features.shape[0]
 
-        mask = self._generate_image_mask(input_image, tf.shape(features)[1: 3])
+        mask = self._generate_image_mask(features)
 
         pos_embed = position_embedding_sine(
-            mask[:, :, :, 0], num_pos_features=self._hidden_size)
-        pos_embed = tf.reshape(pos_embed, [self._batch_size, -1, self._hidden_size])
+            mask, num_pos_features=self._hidden_size)
+        pos_embed = tf.reshape(pos_embed, [batch_size, -1, self._hidden_size])
 
         features = tf.reshape(
-            self._input_proj(features), [self._batch_size, -1, self._hidden_size])
-        mask = tf.reshape(mask, [self._batch_size, -1])
+            self._input_proj(features), [batch_size, -1, self._hidden_size])
 
         decoded_list = self._transformer({
             "inputs":
@@ -86,9 +74,9 @@ def call(self, inputs):
             "targets":
                 tf.tile(
                     tf.expand_dims(self._query_embeddings, axis=0),
-                    (self._batch_size, 1, 1)),
+                    (batch_size, 1, 1)),
             "pos_embed": pos_embed,
-            "mask": mask,
+            "mask": None,
         })
 
         return decoded_list
@@ -98,8 +86,7 @@ def get_config(self):
             "backbone_endpoint_name": self._backbone_endpoint_name,
             "num_queries": self._num_queries,
             "hidden_size": self._hidden_size,
-            "num_classes": self._num_classes,
             "num_encoder_layers": self._num_encoder_layers,
             "num_decoder_layers": self._num_decoder_layers,
             "dropout_rate": self._dropout_rate,
-        }
\ No newline at end of file
+        }
diff --git a/models/official/projects/maskformer/modeling/decoder/transformer_decoder_test.py b/models/official/projects/maskformer/modeling/decoder/transformer_decoder_test.py
index 61770478..dcdcce6b 100644
--- a/models/official/projects/maskformer/modeling/decoder/transformer_decoder_test.py
+++ b/models/official/projects/maskformer/modeling/decoder/transformer_decoder_test.py
@@ -4,18 +4,22 @@
 # from transformer import MaskFormerTransformer
 from official.projects.maskformer.modeling.decoder.transformer_decoder import MaskFormerTransformer
 
-
 class MaskFormerTransformerTest(tf.test.TestCase, parameterized.TestCase):
 
-    @parameterized.named_parameters(("test1", "coco_stuff", "5", 8, 100, 256, 171,),
-                                    ("test2", "coco_panoptic", "5", 1, 100, 256, 133,))
+    @parameterized.named_parameters(('test1', '5', 8, 100, 256, 10,))
     def test_pass_through(self,
-                          testcase_input_name,
-                          backbone_endpoint_name,
-                          batch_size,
-                          num_queries,
-                          hidden_size,
-                          num_classes):
+                        backbone_endpoint_name,
+                        batch_size,
+                        num_queries,
+                        hidden_size,
+                        num_classes):    
+
+        multilevel_features = {
+            "2": tf.ones([1, 160, 160, 256]),
+            "3": tf.ones([1, 80, 80, 512]),
+            "4": tf.ones([1, 40, 40, 1024]),
+            "5": tf.ones([1, 20, 20, 2048])
+        }
 
         transformer = MaskFormerTransformer(backbone_endpoint_name=backbone_endpoint_name,
                                             batch_size=batch_size,
@@ -26,34 +30,13 @@ def test_pass_through(self,
                                             num_decoder_layers=6,
                                             dropout_rate=0.1)
 
-        testcase_input_image = {
-            "coco_stuff": tf.ones((1, 640, 640, 3)),
-            "coco_panoptic": tf.ones((1, 608, 911, 3)),
-        }
-        
-        testcase_backbone_inputs = {
-            "coco_stuff": {
-                "2": tf.ones([1, 160, 160, 256]),
-                "3": tf.ones([1, 80, 80, 512]),
-                "4": tf.ones([1, 40, 40, 1024]),
-                "5": tf.ones([1, 20, 20, 2048])
-            },
-            "coco_panoptic": {
-                "2": tf.ones([1, 152, 228, 256]),
-                "3": tf.ones([1, 76, 114, 512]),
-                "4": tf.ones([1, 38, 57, 1024]),
-                "5": tf.ones([1, 19, 29, 2048])
-            }
-        }
-
-        expected_output_shape = [6, batch_size, num_queries, 256]
+        input_image = tf.ones((1, 640, 640, 3))
+        expected_output_shape = [6, 8, 100, 256]
 
-        output = transformer(
-            {"image": testcase_input_image[testcase_input_name], "features": testcase_backbone_inputs[testcase_input_name]})
+        output = transformer({"image": input_image, "features": multilevel_features })
         output_shape = [len(output)] + output[0].shape.as_list()
 
         self.assertAllEqual(output_shape, expected_output_shape)
 
-
-if __name__ == "__main__":
+if __name__ == '__main__':
     tf.test.main()
diff --git a/models/official/projects/maskformer/modeling/decoder/transformer_pixel_decoder.py b/models/official/projects/maskformer/modeling/decoder/transformer_pixel_decoder.py
new file mode 100644
index 00000000..15794033
--- /dev/null
+++ b/models/official/projects/maskformer/modeling/decoder/transformer_pixel_decoder.py
@@ -0,0 +1,196 @@
+import tensorflow as tf
+import tensorflow_addons as tfa
+from official.vision.ops.spatial_transform_ops import nearest_upsampling
+from official.projects.detr.modeling.detr import position_embedding_sine
+from official.projects.detr.modeling.transformer import TransformerEncoder
+from official.projects.maskformer.modeling.decoder.detr_transformer import DETRTransformer
+
+class TransformerFPN(tf.keras.layers.Layer):
+    """MaskFormer Feature Pyramid Networks."""
+
+    def __init__(self,
+                 fpn_feat_dims=256,
+                 data_format=None,
+                 dilation_rate=(1, 1),
+                 groups=1,
+                 activation='relu',
+                 use_bias=False,
+                 kernel_initializer="glorot_uniform",
+                 bias_initializer="zeros",
+                 kernel_regularizer=None,
+                 bias_regularizer=None,
+                 activity_regularizer=None,
+                 kernel_constraint=None,
+                 bias_constraint=None,
+                 num_encoder_layers = 0,
+                 **kwargs):
+        """FPN initialization function.
+        Args:
+          fpn_feat_dims: `int`, Feature dimension of the fpn.
+          
+          TODO: fill in new args
+          
+        """
+        super(TransformerFPN, self).__init__(**kwargs)
+
+        # conv2d params
+        self._fpn_feat_dims = fpn_feat_dims
+        self._data_format = data_format
+        self._dilation_rate = dilation_rate
+        self._groups = groups
+        self._activation = activation
+        self._use_bias = use_bias
+        self._kernel_initializer = kernel_initializer
+        self._bias_initializer = bias_initializer
+        self._kernel_regularizer = kernel_regularizer
+        self._bias_regularizer = bias_regularizer
+        self._activity_regularizer = activity_regularizer
+        self._kernel_constraint = kernel_constraint
+        self._bias_constraint = bias_constraint
+        self._num_encoder_layers = num_encoder_layers
+        
+
+        if tf.keras.backend.image_data_format() == 'channels_last':
+            # format: (batch_size, height, width, channels)
+            self._channels_last = True
+        else:
+            # format: (batch_size, channels, width, height)
+            self._channels_last = False
+
+    def build(self, multilevel_features):
+        conv_args = {
+            "data_format": self._data_format,
+            "dilation_rate": self._dilation_rate,
+            "groups": self._groups,
+            "activation": None,
+            "use_bias": self._use_bias,
+            "kernel_initializer": self._kernel_initializer,
+            "bias_initializer": self._bias_initializer,
+            "kernel_regularizer": self._kernel_regularizer,
+            "bias_regularizer": self._bias_regularizer,
+            "activity_regularizer": self._activity_regularizer,
+            "kernel_constraint": self._kernel_constraint,
+            "bias_constraint": self._bias_constraint
+        }
+        
+        input_levels = list(multilevel_features.keys())
+        levels = input_levels[:-1]
+
+        self._input_proj = tf.keras.layers.Conv2D(filters=self._fpn_feat_dims,
+                                                  kernel_size=(1, 1),
+                                                  padding='same',
+                                                  name = f"input_proj",
+                                                  use_bias = True)
+        self._transformer_encoder = TransformerEncoder(norm_first=False,
+                                                       dropout_rate = .1,
+                                                       num_layers=self._num_encoder_layers)
+        self._interpolations = []                                               
+        self._conv2d_op_lateral = []
+        self._lateral_groupnorm = []
+        for level in levels[::-1]:
+            lateral = tf.keras.layers.Conv2D(filters=self._fpn_feat_dims,
+                                             kernel_size=(1, 1),
+                                             padding='same',
+                                             name = f"lateral_{level}",
+                                             **conv_args)
+            lateral_norm = tf.keras.layers.GroupNormalization(name = f"lateral_norm_{level}")
+            interpolate = tf.keras.layers.Resizing(
+              multilevel_features[level][1], multilevel_features[level][2], interpolation = "nearest")
+
+            self._conv2d_op_lateral.append(lateral)
+            self._lateral_groupnorm.append(lateral_norm)
+            self._interpolations.append(interpolate)
+
+        self._conv2d_op_down = []
+        self._down_groupnorm = []
+        down = tf.keras.layers.Conv2D(filters=self._fpn_feat_dims,
+                                      strides=(1, 1),
+                                      kernel_size=(3, 3),
+                                      padding='same',
+                                      name = "down_initial_conv",
+                                      **conv_args)
+        down_norm = tf.keras.layers.GroupNormalization(name = "down_initial_norm")
+        self._down_groupnorm.append(down_norm)
+        self._conv2d_op_down.append(down)
+        
+        for level in levels[::-1]:
+            down = tf.keras.layers.Conv2D(filters=self._fpn_feat_dims,
+                                          strides=(1, 1),
+                                          kernel_size=(3, 3),
+                                          padding='same',
+                                          name = f"down_{level}",
+                                          **conv_args)
+            down_norm = tf.keras.layers.GroupNormalization(name = f"down_norm_{level}")
+            self._conv2d_op_down.append(down)
+            self._down_groupnorm.append(down_norm)
+
+        self._conv2d_op_mask = tf.keras.layers.Conv2D(
+            filters=self._fpn_feat_dims,
+            kernel_size=(3, 3),
+            padding='same',
+            name = "mask_proj",
+            **conv_args)
+        
+        self._relu1 = tf.keras.layers.ReLU()
+        self._relu2 = tf.keras.layers.ReLU()
+
+        if not self._channels_last:
+            self._permute1 = tf.keras.layers.Permute((2, 3, 1))
+            self._permute2 = tf.keras.layers.Permute((2, 3, 1))
+
+        super(TransformerFPN, self).build(multilevel_features)
+    
+    def _generate_image_mask(self, features: tf.Tensor) -> tf.Tensor:
+        """Generates image mask from input image."""
+        mask = tf.zeros([features.shape[0],features.shape[1],features.shape[2]])
+        mask = tf.cast(mask, dtype = bool)
+        return mask
+
+    def call(self, multilevel_features):
+        """Returns the FPN features for a given multilevel features.
+        Args:
+          multilevel_features: a `dict` containing `int` keys for continuous feature
+            levels, e.g., [2, 3, 4, 5]. The values are corresponding features with
+            shape [batch_size, height_l, width_l, num_filters].
+        Returns:
+          Mask projection
+        """
+        input_levels = list(multilevel_features.keys())
+
+        feat = multilevel_features[input_levels[-1]]
+        
+        if not self._channels_last:
+            feat = self._permute_1(feat)
+        
+        mask = self._generate_image_mask(feat)
+        pos_embed = position_embedding_sine(
+            mask, num_pos_features=self._fpn_feat_dims)
+
+        features = self._input_proj(feat)
+        
+        transformer = self._transformer_encoder(features, None, pos_embed)
+        
+
+        down = self._conv2d_op_down[0](transformer)
+        down = self._down_groupnorm[0](down)
+        down = self._relu1(down)
+
+        levels = input_levels[:-1]
+        for i, level in enumerate(levels[::-1]):
+            feat = multilevel_features[level]
+
+            if not self._channels_last:
+                feat = self._permute_2(multilevel_features[level])
+
+            lateral = self._conv2d_op_lateral[i](feat)
+            lateral = self._lateral_groupnorm[i](lateral)
+
+            down = self._interpolations[i](down) + lateral
+
+            down = self._conv2d_op_down[i + 1](down)
+            down = self._down_groupnorm[i+1](down)
+            down = self._relu2(down)
+
+        mask = self._conv2d_op_mask(down)
+
+        return mask, transformer
diff --git a/models/official/projects/maskformer/modeling/decoder/trasformer_decoder.pu b/models/official/projects/maskformer/modeling/decoder/trasformer_decoder.pu
new file mode 100644
index 00000000..e69de29b
diff --git a/models/official/projects/maskformer/modeling/layers/.ipynb_checkpoints/nn_block-checkpoint.py b/models/official/projects/maskformer/modeling/layers/.ipynb_checkpoints/nn_block-checkpoint.py
new file mode 100644
index 00000000..74cc8027
--- /dev/null
+++ b/models/official/projects/maskformer/modeling/layers/.ipynb_checkpoints/nn_block-checkpoint.py
@@ -0,0 +1,80 @@
+import tensorflow as tf
+
+'''
+Transformer Parameters:
+
+enc_layers: int,
+dec_layers: int,
+nheads: int,
+dropout: float,
+dim_feedforward: int,
+pre_norm: bool,
+enforce_input_project: bool
+'''
+
+
+class MLPHead(tf.keras.layers.Layer):
+    def __init__(self,
+                 num_classes: int,
+                 hidden_dim: int,
+                 #  dec_supervision: bool,
+                 mask_dim: int):
+        super().__init__()
+
+        self._num_classes = num_classes
+        self._hidden_dim = hidden_dim
+        self._mask_dim = mask_dim
+
+    def build(self, input_shape):
+        self._mlp = MLP(self._hidden_dim, self._hidden_dim, self._mask_dim, 3)
+        self._linear_classifier = tf.keras.layers.Dense(self._num_classes + 1)
+        # No Softmax used in their code? Need to figure out!!
+        # self.linear_classifier = tf.keras.layers.Dense(input_shape=hidden_dim, out_dim=num_classes + 1, activation=None)
+
+        # self.dec_supervision = dec_supervision
+
+    def call(self, inputs):
+        per_pixel_embeddings = inputs['per_pixel_embeddings']
+        per_segment_embeddings = inputs['per_segment_embeddings']
+
+        class_prob_prediction = self._linear_classifier(per_segment_embeddings)
+        mask_embedding = self._mlp(per_segment_embeddings)
+        mask_prob_prediction = tf.einsum(
+            "bqc,bhwc->bhwq", mask_embedding, per_pixel_embeddings)
+
+        return {'class_prob_predictions': class_prob_prediction,'mask_prob_predictions': mask_prob_prediction}
+
+
+class MLP(tf.keras.layers.Layer):
+    def __init__(self,
+                 input_dim: int,
+                 hidden_dim: int,
+                 output_dim: int,
+                 num_layers: int):
+        super().__init__()
+
+        self._input_dim = input_dim
+        self._hidden_dim = hidden_dim
+        self._output_dim = output_dim
+        self._num_layers = num_layers
+
+    def build(self, input_shape):
+        layer_dims = [(self._input_dim, self._hidden_dim)]
+        for _ in range(self._num_layers - 2):
+            layer_dims.append((self._hidden_dim, self._hidden_dim))
+        layer_dims.append((self._hidden_dim, self._output_dim))
+
+        self._layers = []
+        for i, dim in enumerate(layer_dims):
+            if(i < self._num_layers - 1):
+                self._layers.append(tf.keras.layers.Dense(
+                    dim[1], activation=tf.nn.relu))
+            else:
+                # Final Layer
+                self._layers.append(
+                    tf.keras.layers.Dense(dim[1], activation=None))
+
+    def call(self, x):
+        for layer in self._layers:
+            x = layer(x)
+        return x
diff --git a/models/official/projects/maskformer/modeling/layers/.ipynb_checkpoints/nn_block_test-checkpoint.py b/models/official/projects/maskformer/modeling/layers/.ipynb_checkpoints/nn_block_test-checkpoint.py
new file mode 100644
index 00000000..97090df9
--- /dev/null
+++ b/models/official/projects/maskformer/modeling/layers/.ipynb_checkpoints/nn_block_test-checkpoint.py
@@ -0,0 +1,29 @@
+from absl.testing import parameterized
+import tensorflow as tf
+
+from official.projects.maskformer.modeling.layers.nn_block import MLPHead
+
+class MaskFormerTransformerTest(tf.test.TestCase, parameterized.TestCase):
+
+    @parameterized.named_parameters(("test1", 256, 256, 171))
+    def test_pass_through(self,
+                          mask_dim,
+                          hidden_size,
+                          num_classes):
+
+        MLP_head = MLPHead(
+            num_classes=num_classes, hidden_dim=hidden_size, mask_dim=mask_dim)
+
+        inputs = {"per_segment_embeddings": tf.ones((6, 8, 100, 256)),
+                 "per_pixel_embeddings": tf.ones((8, 160, 160, 256))}
+
+        expected_class_probs_shape = [8, 100, 172]
+        expected_mask_probs_shape = [8, 100, 160, 160]
+
+        output = MLP_head(inputs)
+
+        self.assertAllEqual(output["class_prob_predictions"].shape.as_list(), expected_class_probs_shape)
+        self.assertAllEqual(output["mask_prob_predictions"].shape.as_list(), expected_mask_probs_shape)
+
+if __name__ == '__main__':
+    tf.test.main()
diff --git a/models/official/projects/maskformer/modeling/layers/__pycache__/__init__.cpython-39.pyc b/models/official/projects/maskformer/modeling/layers/__pycache__/__init__.cpython-39.pyc
index 0bcaeb5c..015a4416 100644
Binary files a/models/official/projects/maskformer/modeling/layers/__pycache__/__init__.cpython-39.pyc and b/models/official/projects/maskformer/modeling/layers/__pycache__/__init__.cpython-39.pyc differ
diff --git a/models/official/projects/maskformer/modeling/layers/__pycache__/nn_block.cpython-39.pyc b/models/official/projects/maskformer/modeling/layers/__pycache__/nn_block.cpython-39.pyc
index 4eb6c54c..790d7ae5 100644
Binary files a/models/official/projects/maskformer/modeling/layers/__pycache__/nn_block.cpython-39.pyc and b/models/official/projects/maskformer/modeling/layers/__pycache__/nn_block.cpython-39.pyc differ
diff --git a/models/official/projects/maskformer/modeling/layers/nn_block.py b/models/official/projects/maskformer/modeling/layers/nn_block.py
index 41bef080..74cc8027 100644
--- a/models/official/projects/maskformer/modeling/layers/nn_block.py
+++ b/models/official/projects/maskformer/modeling/layers/nn_block.py
@@ -38,11 +38,11 @@ def call(self, inputs):
         per_segment_embeddings = inputs['per_segment_embeddings']
 
         class_prob_prediction = self._linear_classifier(per_segment_embeddings)
-        mask_embedding = self._mlp(per_segment_embeddings[-1])
+        mask_embedding = self._mlp(per_segment_embeddings)
         mask_prob_prediction = tf.einsum(
             "bqc,bhwc->bhwq", mask_embedding, per_pixel_embeddings)
 
-        return {'class_prob_predictions': class_prob_prediction[-1],'mask_prob_predictions': mask_prob_prediction}
+        return {'class_prob_predictions': class_prob_prediction,'mask_prob_predictions': mask_prob_prediction}
 
 
 class MLP(tf.keras.layers.Layer):
diff --git a/models/official/projects/maskformer/modeling/layers/nn_block_test.py b/models/official/projects/maskformer/modeling/layers/nn_block_test.py
index 5d3e3320..97090df9 100644
--- a/models/official/projects/maskformer/modeling/layers/nn_block_test.py
+++ b/models/official/projects/maskformer/modeling/layers/nn_block_test.py
@@ -5,38 +5,22 @@
 
 class MaskFormerTransformerTest(tf.test.TestCase, parameterized.TestCase):
 
-    @parameterized.named_parameters(("test1", "coco_stuff", 256, 256, 171, 100, 8), ("test2", "coco_panoptic", 256, 256, 133, 100, 1))
+    @parameterized.named_parameters(("test1", 256, 256, 171))
     def test_pass_through(self,
-                          testcase_input_name,
                           mask_dim,
                           hidden_size,
-                          num_classes,
-                          num_queries,
-                          batch_size):
+                          num_classes):
 
-        mlp_head = MLPHead(
+        MLP_head = MLPHead(
             num_classes=num_classes, hidden_dim=hidden_size, mask_dim=mask_dim)
 
-        testcase_inputs = {
-            "coco_stuff": {
-                "per_segment_embeddings": tf.ones((6, 8, 100, 256)),
-                "per_pixel_embeddings": tf.ones((8, 160, 160, 256))
-            },
-            "coco_panoptic": {
-                "per_segment_embeddings": tf.ones((6, 1, 100, 256)),
-                "per_pixel_embeddings": tf.ones((1, 152, 228, 256))
-            }
-        }
-        # expected_class_probs_shape = [8, 100, 172]
-        expected_class_probs_shape = [batch_size, num_queries, num_classes + 1]
-
-        # expected_mask_probs_shape = [8, 100, 160, 160]
-        expected_mask_probs_shape = [batch_size,
-                                    testcase_inputs[testcase_input_name]["per_pixel_embeddings"].shape[1],
-                                    testcase_inputs[testcase_input_name]["per_pixel_embeddings"].shape[2],
-                                    num_queries]
-
-        output = mlp_head(testcase_inputs[testcase_input_name])
+        inputs = {"per_segment_embeddings": tf.ones((6, 8, 100, 256)),
+                 "per_pixel_embeddings": tf.ones((8, 160, 160, 256))}
+
+        expected_class_probs_shape = [8, 100, 172]
+        expected_mask_probs_shape = [8, 100, 160, 160]
+
+        output = MLP_head(inputs)
 
         self.assertAllEqual(output["class_prob_predictions"].shape.as_list(), expected_class_probs_shape)
         self.assertAllEqual(output["mask_prob_predictions"].shape.as_list(), expected_mask_probs_shape)
diff --git a/models/official/projects/maskformer/modeling/maskformer.py b/models/official/projects/maskformer/modeling/maskformer.py
index efa81667..f0fb1211 100644
--- a/models/official/projects/maskformer/modeling/maskformer.py
+++ b/models/official/projects/maskformer/modeling/maskformer.py
@@ -2,46 +2,118 @@
 
 from official.vision.modeling.backbones import resnet
 from official.projects.maskformer.modeling.decoder.transformer_decoder import MaskFormerTransformer
-from official.projects.maskformer.modeling.decoder.pixel_decoder import Fpn
 from official.projects.maskformer.modeling.layers.nn_block import MLPHead
+from official.projects.maskformer.modeling.decoder.transformer_pixel_decoder import TransformerFPN
 
 # TODO(ibrahim): Add all parameters model parameters and remove hardcoding.
 class MaskFormer(tf.keras.Model):
-    def __init__(self, num_classes, num_queries, **kwargs):
-        super().__init__(**kwargs)
-        self.num_classes = num_classes
-        self.num_queries = num_queries
+  """Maskformer"""
+  def __init__(self,
+               input_specs,
+               fpn_feat_dims=256,
+               data_format=None,
+               dilation_rate=(1, 1),
+               groups=1,
+               activation='relu',
+               use_bias=False,
+               kernel_initializer="glorot_uniform",
+               bias_initializer="zeros",
+               kernel_regularizer=None,
+               bias_regularizer=None,
+               activity_regularizer=None,
+               kernel_constraint=None,
+               bias_constraint=None,
+               num_queries=100,
+               hidden_size=256,
+               fpn_encoder_layers=6,
+               detr_encoder_layers=0,
+               num_decoder_layers=6,
+               dropout_rate=0.1,
+               backbone_endpoint_name='5',
+               num_classes=133,
+               batch_size=1,
+               **kwargs):
+    self._input_specs = input_specs
+    self._batch_size = batch_size
+    self._num_classes = num_classes
 
-    def build(self, input_shape):
-        self._backbone = resnet.ResNet(50)
-        self._transformer_decoder = MaskFormerTransformer(backbone_endpoint_name='5',
-                                                          batch_size=1,
-                                                          num_queries=self.num_queries,
-                                                          hidden_size=256,
-                                                          num_classes=self.num_classes,
-                                                          num_encoder_layers=0,
-                                                          num_decoder_layers=6,
-                                                          dropout_rate=0.1)
+    # Pixel Deocder paramters.
+    self._fpn_feat_dims = fpn_feat_dims
+    self._data_format = data_format
+    self._dilation_rate = dilation_rate
+    self._groups = groups
+    self._activation = activation
+    self._use_bias = use_bias
+    self._kernel_initializer = kernel_initializer
+    self._bias_initializer = bias_initializer
+    self._kernel_regularizer = kernel_regularizer
+    self._bias_regularizer = bias_regularizer
+    self._activity_regularizer = activity_regularizer
+    self._kernel_constraint = kernel_constraint
+    self._bias_constraint = bias_constraint
 
-        self._pixel_decoder = Fpn(fpn_feat_dims=256)
-        self._MLP_head = MLPHead(
-            num_classes=self.num_classes, hidden_dim=256, mask_dim=256)
+    # DETRTransformer parameters.
+    self._fpn_encoder_layers = fpn_encoder_layers
+    self._detr_encoder_layers = detr_encoder_layers
+    self._num_decoder_layers = num_decoder_layers
+    self._num_queries = num_queries
+    self._hidden_size = hidden_size
+    self._dropout_rate = dropout_rate
+    self._backbone_endpoint = backbone_endpoint_name
+    
 
-    def call(self, inputs):
-        feature_maps = self._backbone(inputs)
-        for i, x in feature_maps.items():
-            print(i, " - ", x.shape)
+    super(MaskFormer, self).__init__(**kwargs)
 
-        per_segment_embeddings = self._transformer_decoder(
-            {"image": inputs, "features": feature_maps})
-        print("\n\nper_segment_embeddings:", tf.shape(per_segment_embeddings))
+  def build(self, image_shape):
+    #backbone
+    print("[Build MaskFormer] image shape: ", image_shape)
+    self.backbone = resnet.ResNet(50, input_specs=self._input_specs, bn_trainable=False)
+    #decoders
+    self.pixel_decoder = TransformerFPN(batch_size = self._batch_size,
+                            fpn_feat_dims=self._fpn_feat_dims,
+                            data_format=self._data_format,
+                            dilation_rate=self._dilation_rate,
+                            groups=self._groups,
+                            activation=self._activation,
+                            use_bias=self._use_bias,
+                            kernel_initializer=self._kernel_initializer,
+                            bias_initializer=self._bias_initializer,
+                            kernel_regularizer=self._kernel_regularizer,
+                            bias_regularizer=self._bias_regularizer,
+                            activity_regularizer=self._activity_regularizer,
+                            kernel_constraint=self._kernel_constraint,
+                            bias_constraint=self._bias_constraint,
+                            num_encoder_layers = self._fpn_encoder_layers)
+    self.transformer = MaskFormerTransformer(backbone_endpoint_name=self._backbone_endpoint,
+                                            batch_size=self._batch_size,
+                                            num_queries=self._num_queries,
+                                            hidden_size=self._hidden_size,
+                                            num_encoder_layers=self._detr_encoder_layers,
+                                            num_decoder_layers=self._num_decoder_layers,
+                                            dropout_rate=self._dropout_rate)
+    self.head = MLPHead(num_classes=self._num_classes, 
+                        hidden_dim=self._hidden_size, 
+                        mask_dim=self._fpn_feat_dims)
+    
+    #self.panoptic_interpolate = tf.keras.layers.Resizing(
+    #          image_shape[1], image_shape[2], interpolation = "bilinear")
+    super(MaskFormer, self).build(image_shape)
+ 
+  def process_feature_maps(self, maps):
+    new_dict = {}
+    for k in maps.keys():
+      new_dict[k[0]] = maps[k]
+    return new_dict
 
-        per_pixel_embeddings = self._pixel_decoder(feature_maps)
-        print("\n\nper_pixel_embeddings:", tf.shape(per_pixel_embeddings))
-
-        class_and_mask_probs = self._MLP_head(
-            {'per_pixel_embeddings': per_pixel_embeddings, 'per_segment_embeddings': tf.stack(per_segment_embeddings)})
-
-        print("\n\nClass:", tf.shape(class_and_mask_probs["class_prob_predictions"]))
-        print("\n\nMask:", tf.shape(class_and_mask_probs["mask_prob_predictions"]))
-        return class_and_mask_probs
+  def call(self, image, training = False):
+    # image = tf.reshape(image, [1, 800, 1135, 3])
+    # image = tf.ones((1, 640, 640, 3))
+    backbone_feature_maps = self.backbone(image)
+    mask_features, transformer_enc_feat = self.pixel_decoder(self.process_feature_maps(backbone_feature_maps))
+    transformer_features = self.transformer({"features": transformer_enc_feat})
+        
+    seg_pred = self.head({"per_pixel_embeddings" : mask_features,
+                          "per_segment_embeddings": transformer_features})
+    #if not training:
+    #    seg_pred["pred_masks"] = self.panoptic_interpolate(seg_pred["pred_masks"])
+    return seg_pred
diff --git a/models/official/projects/maskformer/modeling/maskformer_test.py b/models/official/projects/maskformer/modeling/maskformer_test.py
index 962df813..41c91d0e 100644
--- a/models/official/projects/maskformer/modeling/maskformer_test.py
+++ b/models/official/projects/maskformer/modeling/maskformer_test.py
@@ -1,42 +1,46 @@
-from official.projects.maskformer.maskformer import MaskFormer
+from official.projects.maskformer.modeling.maskformer import MaskFormer
 from absl.testing import parameterized
 import tensorflow as tf
 
 class MaskFormerTest(tf.test.TestCase, parameterized.TestCase):
     # TODO(ibrahim): Add more testcases.
-    @parameterized.named_parameters(('test1', 'coco_stuff', 100, 171), ('test2', 'coco_panoptic', 100, 133))
-    def test_pass_through(self, testcase_input_name, num_queries, num_classes):
-
-        model = MaskFormer(num_queries=num_queries, num_classes=num_classes)
-
-        # input_image = tf.ones((1, 640, 640, 3))
-        testcase_input = {
-            "coco_stuff": tf.ones((1, 640, 640, 3)),
-            "coco_panoptic": tf.ones((1, 608, 911, 3))
-        }
+    @parameterized.named_parameters(('test1', 256, 100, 256, "5", 6, 0, 6, 133, 1))
+    def test_pass_through(self,
+                        fpn_feat_dims,
+                        num_queries,
+                        hidden_size,
+                        backbone_endpoint_name,
+                        fpn_encoder_layers,
+                        detr_encoder_layers,
+                        num_decoder_layers,
+                        num_classes,
+                        batch_size):    
+        input_specs = tf.keras.layers.InputSpec(shape=[None] +
+                                            [640, 640, 3])    
+        maskformer = MaskFormer(input_specs= input_specs, hidden_size=hidden_size,
+                                 backbone_endpoint_name=backbone_endpoint_name,
+                                 fpn_encoder_layers=fpn_encoder_layers,
+                                 detr_encoder_layers=detr_encoder_layers,
+                                 num_decoder_layers=num_decoder_layers,
+                                 num_classes=num_classes,
+                                 batch_size=batch_size)
+
+        input_image = tf.ones((1, 640, 640, 3))
         
-        # TODO(ibrahim): Add num_queries and make expected output shape dynamic after adding parameters.
         # expected_class_probs_shape = [1, 100, 172]
         # expected_mask_probs_shape = [1, 160, 160, 100]
+        expected_class_probs_shape = [1, 100, 134] # B, dim of logits, number of classes
+        expected_mask_probs_shape = [1, 160, 160, 100] # B,H,W,C
 
-        testcases_expected_output = {
-            "coco_stuff": {
-                "class_prob_predictions": [1, 100, 172],
-                "mask_prob_predictions": [1, 160, 160, 100]
-            },
-            "coco_panoptic": {
-                "class_prob_predictions": [1, num_queries, 134], 
-                "mask_prob_predictions": [1, 152, 228, num_queries]
-            }
-        }
-
-        output = model(testcase_input[testcase_input_name])
-
+        output = maskformer(input_image)
+        print(output.keys())
+        exit()
         self.assertAllEqual(
-            output["class_prob_predictions"].shape.as_list(), testcases_expected_output[testcase_input_name]["class_prob_predictions"])
+            output["class_prob_predictions"].shape.as_list(), expected_class_probs_shape)
         self.assertAllEqual(
-            output["mask_prob_predictions"].shape.as_list(), testcases_expected_output[testcase_input_name]["mask_prob_predictions"])
+            output["mask_prob_predictions"].shape.as_list(), expected_mask_probs_shape)
 
 
 if __name__ == '__main__':
     tf.test.main()
+
diff --git a/models/official/projects/maskformer/optimization.py b/models/official/projects/maskformer/optimization.py
new file mode 100644
index 00000000..062dac1c
--- /dev/null
+++ b/models/official/projects/maskformer/optimization.py
@@ -0,0 +1,147 @@
+# Copyright 2023 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Customized optimizer to match paper results."""
+
+import dataclasses
+import tensorflow as tf
+from official.modeling import optimization
+from official.nlp import optimization as nlp_optimization
+
+
+@dataclasses.dataclass
+class DETRAdamWConfig(optimization.AdamWeightDecayConfig):
+  pass
+
+
+@dataclasses.dataclass
+class OptimizerConfig(optimization.OptimizerConfig):
+  detr_adamw: DETRAdamWConfig = DETRAdamWConfig()
+
+
+@dataclasses.dataclass
+class OptimizationConfig(optimization.OptimizationConfig):
+  """Configuration for optimizer and learning rate schedule.
+
+  Attributes:
+    optimizer: optimizer oneof config.
+    ema: optional exponential moving average optimizer config, if specified, ema
+      optimizer will be used.
+    learning_rate: learning rate oneof config.
+    warmup: warmup oneof config.
+  """
+  optimizer: OptimizerConfig = OptimizerConfig()
+
+
+# TODO(frederickliu): figure out how to make this configuable.
+# TODO(frederickliu): Study if this is needed.
+class _DETRAdamW(nlp_optimization.AdamWeightDecay):
+  """Custom AdamW to support different lr scaling for backbone.
+
+  The code is copied from AdamWeightDecay and Adam with learning scaling.
+  """
+
+  def _resource_apply_dense(self, grad, var, apply_state=None):
+    lr_t, kwargs = self._get_lr(var.device, var.dtype.base_dtype, apply_state)
+    apply_state = kwargs['apply_state']
+    if 'detr' not in var.name:
+      lr_t *= 0.1
+    decay = self._decay_weights_op(var, lr_t, apply_state)
+    with tf.control_dependencies([decay]):
+      var_device, var_dtype = var.device, var.dtype.base_dtype
+      coefficients = ((apply_state or {}).get((var_device, var_dtype))
+                      or self._fallback_apply_state(var_device, var_dtype))
+
+      m = self.get_slot(var, 'm')
+      v = self.get_slot(var, 'v')
+      lr = coefficients[
+          'lr_t'] * 0.1 if 'detr' not in var.name else coefficients['lr_t']
+
+      if not self.amsgrad:
+        return tf.raw_ops.ResourceApplyAdam(
+            var=var.handle,
+            m=m.handle,
+            v=v.handle,
+            beta1_power=coefficients['beta_1_power'],
+            beta2_power=coefficients['beta_2_power'],
+            lr=lr,
+            beta1=coefficients['beta_1_t'],
+            beta2=coefficients['beta_2_t'],
+            epsilon=coefficients['epsilon'],
+            grad=grad,
+            use_locking=self._use_locking)
+      else:
+        vhat = self.get_slot(var, 'vhat')
+        return tf.raw_ops.ResourceApplyAdamWithAmsgrad(
+            var=var.handle,
+            m=m.handle,
+            v=v.handle,
+            vhat=vhat.handle,
+            beta1_power=coefficients['beta_1_power'],
+            beta2_power=coefficients['beta_2_power'],
+            lr=lr,
+            beta1=coefficients['beta_1_t'],
+            beta2=coefficients['beta_2_t'],
+            epsilon=coefficients['epsilon'],
+            grad=grad,
+            use_locking=self._use_locking)
+
+  def _resource_apply_sparse(self, grad, var, indices, apply_state=None):
+    lr_t, kwargs = self._get_lr(var.device, var.dtype.base_dtype, apply_state)
+    apply_state = kwargs['apply_state']
+    if 'detr' not in var.name:
+      lr_t *= 0.1
+    decay = self._decay_weights_op(var, lr_t, apply_state)
+    with tf.control_dependencies([decay]):
+      var_device, var_dtype = var.device, var.dtype.base_dtype
+      coefficients = ((apply_state or {}).get((var_device, var_dtype))
+                      or self._fallback_apply_state(var_device, var_dtype))
+
+      # m_t = beta1 * m + (1 - beta1) * g_t
+      m = self.get_slot(var, 'm')
+      m_scaled_g_values = grad * coefficients['one_minus_beta_1_t']
+      m_t = tf.compat.v1.assign(m, m * coefficients['beta_1_t'],
+                                use_locking=self._use_locking)
+      with tf.control_dependencies([m_t]):
+        m_t = self._resource_scatter_add(m, indices, m_scaled_g_values)
+
+      # v_t = beta2 * v + (1 - beta2) * (g_t * g_t)
+      v = self.get_slot(var, 'v')
+      v_scaled_g_values = (grad * grad) * coefficients['one_minus_beta_2_t']
+      v_t = tf.compat.v1.assign(v, v * coefficients['beta_2_t'],
+                                use_locking=self._use_locking)
+      with tf.control_dependencies([v_t]):
+        v_t = self._resource_scatter_add(v, indices, v_scaled_g_values)
+      lr = coefficients[
+          'lr_t'] * 0.1 if 'detr' not in var.name else coefficients['lr_t']
+      if not self.amsgrad:
+        v_sqrt = tf.sqrt(v_t)
+        var_update = tf.compat.v1.assign_sub(
+            var, lr * m_t / (v_sqrt + coefficients['epsilon']),
+            use_locking=self._use_locking)
+        return tf.group(*[var_update, m_t, v_t])
+      else:
+        v_hat = self.get_slot(var, 'vhat')
+        v_hat_t = tf.maximum(v_hat, v_t)
+        with tf.control_dependencies([v_hat_t]):
+          v_hat_t = tf.compat.v1.assign(
+              v_hat, v_hat_t, use_locking=self._use_locking)
+        v_hat_sqrt = tf.sqrt(v_hat_t)
+        var_update = tf.compat.v1.assign_sub(
+            var,
+            lr* m_t / (v_hat_sqrt + coefficients['epsilon']),
+            use_locking=self._use_locking)
+        return tf.group(*[var_update, m_t, v_t, v_hat_t])
+
+optimization.register_optimizer_cls('detr_adamw', _DETRAdamW)
diff --git a/models/official/projects/maskformer/readme.md b/models/official/projects/maskformer/readme.md
index 0e305431..d1dbc5cc 100644
--- a/models/official/projects/maskformer/readme.md
+++ b/models/official/projects/maskformer/readme.md
@@ -1,30 +1,21 @@
-# MaskFormer: Per-Pixel Classification is Not All You Need for Semantic Segmentation
 
-TensorFlow 2 implementation of MaskFormer: Per-Pixel Classification is Not All You Need for Semantic Segmentation
-
-
-export PYTHONPATH=$PYTHONPATH:/depot/qqiu/data/vishal/projects/tf_maskformer_debug/models
+```
+module load cuda/11.7.0 cudnn/cuda-11.7_8.6 gcc/6.3.0
+export PYTHONPATH=$PYTHONPATH:<path_to_models_folder>
+```
 
-## Imp paths
-code path - /depot/qqiu/data/vishal/projects/tf_maskformer_debug/models/official/projects/maskformer/ckpts
-data path - /depot/davisjam/data/vishal/datasets/coco
 ## Environment creation 
+```
 conda create -n tfmaskformer
-conda activate /depot/qqiu/data/vishal/envs/tmaskformer
-pip install -r requirements.txt
-
-## Dataset Download and Prep
+conda activate tfmaskformer
+pip install -r /models/official/requirements.txt
+pip install tensorflow-text-nightly
 ```
-chmod +x ./data/create_tf_records.sh
-cd /depot/qqiu/data/vishal/projects/tf_maskformer_integration/official/projects/maskformer/data
-./create_tf_records.sh /depot/davisjam/data/vishal/datasets/coco
 
+## To start training
 ```
-module load cuda/11.7.0 cudnn/cuda-11.7_8.6 gcc/6.3.0
-
-
-# For debugging the code
 python3 official/projects/maskformer/train.py \
   --experiment=maskformer_coco_panoptic \
-  --mode=train_and_eval \
-  --model_dir=/depot/qqiu/data/vishal/projects/tf_maskformer_debug/models/official/projects/maskformer/ckpts \
\ No newline at end of file
+  --mode=train \
+  --model_dir=<model_dir> \
+```
\ No newline at end of file
diff --git a/models/official/projects/maskformer/requirements.txt b/models/official/projects/maskformer/requirements.txt
index 91cd6172..addfe13e 100644
--- a/models/official/projects/maskformer/requirements.txt
+++ b/models/official/projects/maskformer/requirements.txt
@@ -1,4 +1,4 @@
-tensorflow
+tensorflow==2.11.0
 pyyaml
 gin-config==0.1.1
 tensorflow_addons
diff --git a/models/official/projects/maskformer/tasks/.ipynb_checkpoints/panoptic_maskformer-checkpoint.py b/models/official/projects/maskformer/tasks/.ipynb_checkpoints/panoptic_maskformer-checkpoint.py
new file mode 100644
index 00000000..4fb99775
--- /dev/null
+++ b/models/official/projects/maskformer/tasks/.ipynb_checkpoints/panoptic_maskformer-checkpoint.py
@@ -0,0 +1,214 @@
+import tensorflow as tf
+
+from official.core import base_task
+from official.core import task_factory
+from typing import Any, Dict, List, Mapping, Optional, Tuple
+
+from official.projects.maskformer.dataloaders import input_reader
+from official.vision.dataloaders import input_reader_factory
+from official.common import dataset_fn
+
+from official.projects.maskformer.configs import maskformer as exp_cfg
+from official.projects.maskformer.modeling.maskformer import MaskFormer
+from official.projects.maskformer.losses.maskformer_losses import Loss
+from official.projects.maskformer.dataloaders import panoptic_input
+
+from official.projects.detr.ops.matchers import hungarian_matching
+from official.projects.maskformer.losses.maskformer_losses import Loss
+
+import numpy as np
+from loguru import logger
+
+@task_factory.register_task_cls(exp_cfg.MaskFormerTask)
+class PanopticTask(base_task.Task):
+	
+	def build_model(self)-> tf.keras.Model:
+		"""Builds MaskFormer Model."""
+		# TODO : Remove hardcoded values, Verify the number of classes 
+		input_specs = tf.keras.layers.InputSpec(shape=[None] +
+                                            [640, 640, 3])
+		
+		model = MaskFormer(input_specs= input_specs, hidden_size=256,
+                                 backbone_endpoint_name="5",
+                                 num_encoder_layers=0,
+                                 num_decoder_layers=6,
+                                 num_classes=133,
+                                 batch_size=8)
+
+		return model
+	
+	def build_inputs(self, params, input_context: Optional[tf.distribute.InputContext] = None) -> tf.data.Dataset:
+		""" 
+		Build panoptic segmentation dataset.
+
+		"""
+		
+		# tf.profiler.experimental.server.start(6000)
+		if params.decoder.type == 'simple_decoder':
+			decoder = panoptic_input.TfExampleDecoder(regenerate_source_id = params.regenerate_source_id)
+		else:
+			raise ValueError('Unknown decoder type: {}!'.format(params.decoder.type))
+		
+		parser = panoptic_input.mask_former_parser(params.parser, is_training = params.is_training, decoder_fn=decoder.decode)
+		reader = input_reader.InputFn(params,dataset_fn = dataset_fn.pick_dataset_fn(params.file_type),parser_fn = parser)
+		dataset = reader(ctx=input_context)
+		# for sample in dataset.take(1):
+		# 	print(f"unique ids : {sample[1]['unique_ids']}")
+		# 	print("individual masks :", sample[1]["individual_masks"].shape)
+		# 	print(f"image shape : {sample[0].shape}")
+			# logger.debug(f"category_mask : {sample[1]['category_mask'].shape}")
+			# logger.debug(f"mask_labels :{sample[1]['mask_labels']}")
+			# logger.debug(f"instance_mask:{sample[1]['instance_mask'].shape}")
+			# print(sample[1]["instance_centers_heatmap"].shape)
+			# print(sample[1]["instance_centers_offset"].shape)
+			# print(sample[1]["semantic_weights"].shape)
+			# print(sample[1]["valid_mask"].shape)
+			# print(sample[1]["things_mask"].shape)
+			
+		# exit()
+		
+		return dataset
+
+	def initialize(self, model: tf.keras.Model) -> None:
+		"""
+		Used to initialize the models with checkpoint
+		"""
+		#TODO : R50 checkpoint
+		pass
+
+	def build_losses(self, output, labels, aux_outputs=None):
+		# TODO : Auxilary outputs
+		outputs = {"pred_logits": output["class_prob_predictions"], "pred_masks": output["mask_prob_predictions"]}
+		targets = labels
+		# print("pred_logits : ", outputs["pred_logits"].shape)
+		# print("mask_prob_predictions : ", outputs["pred_masks"].shape)
+		
+		matcher = hungarian_matching
+		no_object_weight = 0.1
+		# TODO : Remove hardcoded values, number of classes
+		loss = Loss(
+			num_classes = 133,
+			matcher = matcher,
+			eos_coef = no_object_weight,
+			cost_class= 1.0,
+			cost_dice= 1.0,
+			cost_focal=20.0
+		)
+
+		calculated_losses = loss(outputs, targets)
+		
+		# Losses are returned as weighted sum of individual losses
+		total_loss = calculated_losses['loss_ce'] + calculated_losses['loss_dice'] + calculated_losses['loss_focal']
+
+		weighted_ce = calculated_losses['loss_ce']
+		weighted_focal = calculated_losses['loss_dice']
+		weighted_dice = calculated_losses['loss_focal']
+
+		# Not implemented auxilary outputs
+		# if aux_outputs is not None:
+		# 	total_aux_loss = 0.0
+		# 	# TODO : Remove hardcoding
+		# 	for i in range(4): #4 number of auxilary outputs
+		# 		total_aux_loss += calculated_losses['loss_ce_'+str(i)] + calculated_losses['loss_dice_'+str(i)] + calculated_losses['loss_focal_'+str(i)]
+		# 	total_loss = total_loss + total_aux_loss
+		
+
+		return total_loss, weighted_ce, weighted_focal, weighted_dice
+	
+	def build_metrics(self, training=True):
+		"""Builds panoptic metrics."""
+		metrics = []
+		metric_names = ['cls_loss', 'focal_loss', 'dice_loss']
+		for name in metric_names:
+			metrics.append(tf.keras.metrics.Mean(name, dtype=tf.float32))
+		# TODO : Need panoptic quality metric for evaluation
+		
+		return metrics
+	
+	
+	
+	def train_step(self, inputs: Tuple[Any, Any],model: tf.keras.Model, optimizer: tf.keras.optimizers.Optimizer, metrics: Optional[List[Any]] = None) -> Dict[str, Any]:
+		"""
+			Does forward and backward.
+
+			Args:
+			inputs: a dictionary of input tensors.
+			model: the model, forward pass definition.
+			optimizer: the optimizer for this training step.
+			metrics: a nested structure of metrics objects.
+
+			Returns:
+			A dictionary of logs.
+    	"""
+		features, labels = inputs
+		with tf.GradientTape() as tape:
+			outputs = model(features, training=True)
+			
+			loss = 0.0
+			cls_loss = 0.0
+			focal_loss = 0.0
+			dice_loss = 0.0
+
+			##########################################################
+			# TODO : Need to use this for TPU training when we use mirrored startegy
+
+			# print(outputs.shape)
+			# exit()
+			# for output in outputs:
+			# 	# Computes per-replica loss.
+				
+			# 	total_loss, cls_loss_, focal_loss_, dice_loss_ = self.build_losses(
+			# 		output=output, labels=labels)
+			# 	loss += total_loss
+			# 	cls_loss += cls_loss_
+			# 	focal_loss += focal_loss_
+			# 	dice_loss += dice_loss_
+			
+			# 	scaled_loss = loss
+			# 	# For mixed_precision policy, when LossScaleOptimizer is used, loss is
+			# 	# scaled for numerical stability.
+			if isinstance(optimizer, tf.keras.mixed_precision.LossScaleOptimizer):
+				scaled_loss = optimizer.get_scaled_loss(scaled_loss)
+		##########################################################################
+			
+			# TODO : Add auxiallary losses
+			total_loss, cls_loss, focal_loss, dice_loss = self.build_losses(output=outputs, labels=labels)
+			
+			tvars = model.trainable_variables
+			
+			grads = tape.gradient(total_loss, tvars)
+
+			####################################################################
+			# Do not use mixed precision for now
+			# # Scales back gradient when LossScaleOptimizer is used.
+			
+			optimizer.apply_gradients(list(zip(grads, tvars)))
+			
+			# # Multiply for logging.
+			# # Since we expect the gradient replica sum to happen in the optimizer,
+			# # the loss is scaled with global num_boxes and weights.
+			# # To have it more interpretable/comparable we scale it back when logging.
+			num_replicas_in_sync = tf.distribute.get_strategy().num_replicas_in_sync
+			total_loss *= num_replicas_in_sync
+			cls_loss *= num_replicas_in_sync
+			focal_loss *= num_replicas_in_sync
+			dice_loss *= num_replicas_in_sync
+			#####################################################################
+			# # Trainer class handles loss metric for you.
+			logs = {self.loss: total_loss}
+
+			all_losses = {
+				'cls_loss': cls_loss,
+				'focal_loss': focal_loss,
+			   'dice_loss': dice_loss,
+			}
+
+			
+			# # Metric results will be added to logs for you.
+			if metrics:
+				for m in metrics:
+					m.update_state(all_losses[m.name])
+			return logs
+
+	def validation_step(self, inputs, model, optimizer, metrics=None):
+		pass
diff --git a/models/official/projects/maskformer/tasks/__pycache__/panoptic_maskformer.cpython-39.pyc b/models/official/projects/maskformer/tasks/__pycache__/panoptic_maskformer.cpython-39.pyc
index 08fccbe2..449dc4ae 100644
Binary files a/models/official/projects/maskformer/tasks/__pycache__/panoptic_maskformer.cpython-39.pyc and b/models/official/projects/maskformer/tasks/__pycache__/panoptic_maskformer.cpython-39.pyc differ
diff --git a/models/official/projects/maskformer/tasks/panoptic_maskformer.py b/models/official/projects/maskformer/tasks/panoptic_maskformer.py
index c5f5523e..411082b1 100644
--- a/models/official/projects/maskformer/tasks/panoptic_maskformer.py
+++ b/models/official/projects/maskformer/tasks/panoptic_maskformer.py
@@ -1,97 +1,258 @@
 import tensorflow as tf
 
 from official.core import base_task
+from official.core import task_factory
 from typing import Any, Dict, List, Mapping, Optional, Tuple
+
+from official.projects.maskformer.dataloaders import input_reader
+from official.vision.dataloaders import input_reader_factory
+from official.common import dataset_fn
+
+from official.projects.maskformer.configs import maskformer as exp_cfg
 from official.projects.maskformer.modeling.maskformer import MaskFormer
 from official.projects.maskformer.losses.maskformer_losses import Loss
+from official.projects.maskformer.losses.inference import PanopticInference
 from official.projects.maskformer.dataloaders import panoptic_input
-from official.vision.dataloaders import input_reader
-from official.vision.dataloaders import input_reader_factory
-from official.common import dataset_fn
 
+from official.projects.detr.ops.matchers import hungarian_matching
+
+import numpy as np
+from loguru import logger
+
+@task_factory.register_task_cls(exp_cfg.MaskFormerTask)
 class PanopticTask(base_task.Task):
 	
 	def build_model(self)-> tf.keras.Model:
 		"""Builds MaskFormer Model."""
-		# TODO(ibrahim): Connect to params in config.
-		model = MaskFormer()
+		# TODO : Remove hardcoded values, Verify the number of classes 
+		input_specs = tf.keras.layers.InputSpec(shape=[None] +
+                                            self._task_config.model.input_size)
+		
+		model = MaskFormer(input_specs= input_specs,
+                           num_queries=self._task_config.model.num_queries,
+                           hidden_size=self._task_config.model.hidden_size,
+                           backbone_endpoint_name=self._task_config.model.backbone_endpoint_name,
+                           fpn_encoder_layers=self._task_config.model.fpn_encoder_layers,
+                           detr_encoder_laters=self._task_config.model.detr_encoder_layers,
+                           num_decoder_layers=self._task_config.model.num_decoder_layers,
+                           num_classes=self._task_config.model.num_classes,
+                           )
 
 		return model
+    
+    def initialize(self, model: tf.keras.Model) -> None:
+		"""
+		Used to initialize the models with checkpoint
+		"""
+		"""Loading pretrained checkpoint."""
+        if not self._task_config.init_checkpoint:
+            return
+
+        ckpt_dir_or_file = self._task_config.init_checkpoint
+
+        # Restoring checkpoint.
+        if tf.io.gfile.isdir(ckpt_dir_or_file):
+            ckpt_dir_or_file = tf.train.latest_checkpoint(ckpt_dir_or_file)
+
+        if self._task_config.init_checkpoint_modules == 'all':
+            ckpt = tf.train.Checkpoint(**model.checkpoint_items)
+            status = ckpt.restore(ckpt_dir_or_file)
+            status.assert_consumed()
+        elif self._task_config.init_checkpoint_modules == 'backbone':
+            ckpt = tf.train.Checkpoint(backbone=model.backbone)
+            status = ckpt.restore(ckpt_dir_or_file)
+            status.expect_partial().assert_existing_objects_matched()
+
+        logging.info('Finished loading pretrained checkpoint from %s',
+                     ckpt_dir_or_file)
 	
 	def build_inputs(self, params, input_context: Optional[tf.distribute.InputContext] = None) -> tf.data.Dataset:
 		""" 
 		Build panoptic segmentation dataset.
-
 		"""
-		pass
+		
+		# tf.profiler.experimental.server.start(6000)
+		if params.decoder.type == 'simple_decoder':
+			decoder = panoptic_input.TfExampleDecoder(regenerate_source_id = params.regenerate_source_id)
+		else:
+			raise ValueError('Unknown decoder type: {}!'.format(params.decoder.type))
+		
+		parser = panoptic_input.mask_former_parser(params.parser, is_training = params.is_training, decoder_fn=decoder.decode)
+		reader = input_reader.InputFn(params,dataset_fn = dataset_fn.pick_dataset_fn(params.file_type),parser_fn = parser)
+		dataset = reader(ctx=input_context)
+		
+		return dataset
+
+	
+
+	def build_losses(self, output, labels, aux_outputs=None):
+		# TODO : Auxilary outputs
+		outputs = {"pred_logits": output["class_prob_predictions"], "pred_masks": output["mask_prob_predictions"]}
+		targets = labels
+		# print("pred_logits : ", outputs["pred_logits"].shape)
+		# print("mask_prob_predictions : ", outputs["pred_masks"].shape)
+		
+		matcher = hungarian_matching
+		no_object_weight = self._task_config.losses.no_object_weight
+		# TODO : Remove hardcoded values, number of classes
+		loss = Loss(
+			num_classes = self._task_config.model.num_classes,
+			matcher = matcher,
+			eos_coef = no_object_weight,
+			cost_class= self._task_config.losses.cost_class,
+			cost_dice= self._task_config.losses.cost_dice,
+			cost_focal= self._task_config.losses.cost_focal
+		)
 
-	def build_losses(self, class_prob_outputs, mask_prob_outputs, class_targets, mask_targets):
-		outputs = {"pred_logits": class_prob_outputs, "pred_masks": mask_prob_outputs}
-		targets = {"labels": class_targets, "masks": mask_targets}
+		calculated_losses = loss(outputs, targets)
 		
-		# _compute_loss = Loss(init loss here...)
-		# return _compute_loss(outputs, targets)
-		raise NotImplementedError
+		# Losses are returned as weighted sum of individual losses
+		total_loss = calculated_losses['loss_ce'] + calculated_losses['loss_dice'] + calculated_losses['loss_focal']
+
+		weighted_ce = calculated_losses['loss_ce']
+		weighted_focal = calculated_losses['loss_dice']
+		weighted_dice = calculated_losses['loss_focal']
+
+		# Not implemented auxilary outputs
+		# if aux_outputs is not None:
+		# 	total_aux_loss = 0.0
+		# 	# TODO : Remove hardcoding
+		# 	for i in range(4): #4 number of auxilary outputs
+		# 		total_aux_loss += calculated_losses['loss_ce_'+str(i)] + calculated_losses['loss_dice_'+str(i)] + calculated_losses['loss_focal_'+str(i)]
+		# 	total_loss = total_loss + total_aux_loss
+		
+
+		return total_loss, weighted_ce, weighted_focal, weighted_dice
 	
 	def build_metrics(self, training=True):
-		raise NotImplementedError
-
+		"""Builds panoptic metrics."""
+		metrics = []
+		metric_names = ['cls_loss', 'focal_loss', 'dice_loss']
+		for name in metric_names:
+			metrics.append(tf.keras.metrics.Mean(name, dtype=tf.float32))
+		# TODO : Need panoptic quality metric for evaluation
+		
+		return metrics
+	
+	
+	
 	def train_step(self, inputs: Tuple[Any, Any],model: tf.keras.Model, optimizer: tf.keras.optimizers.Optimizer, metrics: Optional[List[Any]] = None) -> Dict[str, Any]:
+		"""
+			Does forward and backward.
+
+			Args:
+			inputs: a dictionary of input tensors.
+			model: the model, forward pass definition.
+			optimizer: the optimizer for this training step.
+			metrics: a nested structure of metrics objects.
+
+			Returns:
+			A dictionary of logs.
+    	"""
 		features, labels = inputs
 		with tf.GradientTape() as tape:
 			outputs = model(features, training=True)
-
-			#TODO Change to maskformer loss
+			
 			loss = 0.0
 			cls_loss = 0.0
-			box_loss = 0.0
-			giou_loss = 0.0
-
-			for output in outputs:
-				# Computes per-replica loss.
-				layer_loss, layer_cls_loss, layer_box_loss, layer_giou_loss = self.build_losses(
-					outputs=output, labels=labels, aux_losses=model.losses)
-				loss += layer_loss
-				cls_loss += layer_cls_loss
-				box_loss += layer_box_loss
-				giou_loss += layer_giou_loss
+			focal_loss = 0.0
+			dice_loss = 0.0
+
+			##########################################################
+			# TODO : Need to use this for TPU training when we use mirrored startegy
+
+			# print(outputs.shape)
+			# exit()
+			# for output in outputs:
+			# 	# Computes per-replica loss.
+				
+			# 	total_loss, cls_loss_, focal_loss_, dice_loss_ = self.build_losses(
+			# 		output=output, labels=labels)
+			# 	loss += total_loss
+			# 	cls_loss += cls_loss_
+			# 	focal_loss += focal_loss_
+			# 	dice_loss += dice_loss_
 			
-			scaled_loss = loss
-			# For mixed_precision policy, when LossScaleOptimizer is used, loss is
-			# scaled for numerical stability.
+			# 	scaled_loss = loss
+			# 	# For mixed_precision policy, when LossScaleOptimizer is used, loss is
+			# 	# scaled for numerical stability.
 			if isinstance(optimizer, tf.keras.mixed_precision.LossScaleOptimizer):
 				scaled_loss = optimizer.get_scaled_loss(scaled_loss)
-		
-		tvars = model.trainable_variables
-		grads = tape.gradient(scaled_loss, tvars)
-		# Scales back gradient when LossScaleOptimizer is used.
-		if isinstance(optimizer, tf.keras.mixed_precision.LossScaleOptimizer):
-			grads = optimizer.get_unscaled_gradients(grads)
-		optimizer.apply_gradients(list(zip(grads, tvars)))
-
-		# Multiply for logging.
-		# Since we expect the gradient replica sum to happen in the optimizer,
-		# the loss is scaled with global num_boxes and weights.
-		# To have it more interpretable/comparable we scale it back when logging.
+		##########################################################################
+			
+			# TODO : Add auxiallary losses
+			total_loss, cls_loss, focal_loss, dice_loss = self.build_losses(output=outputs, labels=labels)
+			
+			tvars = model.trainable_variables
+			
+			grads = tape.gradient(total_loss, tvars)
+
+			####################################################################
+			# Do not use mixed precision for now
+			# # Scales back gradient when LossScaleOptimizer is used.
+			
+			optimizer.apply_gradients(list(zip(grads, tvars)))
+			
+			# # Multiply for logging.
+			# # Since we expect the gradient replica sum to happen in the optimizer,
+			# # the loss is scaled with global num_boxes and weights.
+			# # To have it more interpretable/comparable we scale it back when logging.
+			num_replicas_in_sync = tf.distribute.get_strategy().num_replicas_in_sync
+			total_loss *= num_replicas_in_sync
+			cls_loss *= num_replicas_in_sync
+			focal_loss *= num_replicas_in_sync
+			dice_loss *= num_replicas_in_sync
+			#####################################################################
+			# # Trainer class handles loss metric for you.
+			logs = {self.loss: total_loss}
+
+			all_losses = {
+				'cls_loss': cls_loss,
+				'focal_loss': focal_loss,
+			   'dice_loss': dice_loss,
+			}
+
+			
+			# # Metric results will be added to logs for you.
+			if metrics:
+				for m in metrics:
+					m.update_state(all_losses[m.name])
+			return logs
+
+	def validation_step(self, inputs, model, optimizer, metrics=None):
+		features, labels = inputs
+		outputs = model(features, training=False)
+			
+		loss = 0.0
+		cls_loss = 0.0
+		focal_loss = 0.0
+		dice_loss = 0.0
+
+		total_loss, cls_loss, focal_loss, dice_loss = self.build_losses(output=outputs, labels=labels)
+
 		num_replicas_in_sync = tf.distribute.get_strategy().num_replicas_in_sync
-		loss *= num_replicas_in_sync
+		total_loss *= num_replicas_in_sync
 		cls_loss *= num_replicas_in_sync
-		box_loss *= num_replicas_in_sync
-		giou_loss *= num_replicas_in_sync
-
-		# Trainer class handles loss metric for you.
-		logs = {self.loss: loss}
+		focal_loss *= num_replicas_in_sync
+		dice_loss *= num_replicas_in_sync
+		
+        #####################################################################
+		# # Trainer class handles loss metric for you.
+		logs = {self.loss: total_loss}
+        
+        outputs = {"pred_logits": output["class_prob_predictions"], "pred_masks": output["mask_prob_predictions"]}
+        panoptic_seg, segments_info = PanopticInference(output["pred_logits"], output["pred_masks"], features.shape,  self._task_config.model.num_classes)
+        
+        logs.update({'panoptic_seg': panoptic_seg, 'segments_info': segments_info})
 
 		all_losses = {
-			'cls_loss': cls_loss,
-			'box_loss': box_loss,
-		   'giou_loss': giou_loss,
-		}
-		# Metric results will be added to logs for you.
+				'cls_loss': cls_loss,
+				'focal_loss': focal_loss,
+			   'dice_loss': dice_loss,
+			}
+
+		# # Metric results will be added to logs for you.
 		if metrics:
-			for m in metrics:
-				m.update_state(all_losses[m.name])
+				for m in metrics:
+					m.update_state(all_losses[m.name])
 		return logs
-
-	def validation_step(self, inputs, model, optimizer, metrics=None):
-		raise NotImplementedError
diff --git a/models/official/projects/maskformer/testing.ipynb b/models/official/projects/maskformer/testing.ipynb
new file mode 100644
index 00000000..956cbd9b
--- /dev/null
+++ b/models/official/projects/maskformer/testing.ipynb
@@ -0,0 +1,32 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.5"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/models/official/projects/maskformer/tpu_test.py b/models/official/projects/maskformer/tpu_test.py
new file mode 100644
index 00000000..687d857e
--- /dev/null
+++ b/models/official/projects/maskformer/tpu_test.py
@@ -0,0 +1,8 @@
+import tensorflow as tf
+
+if __name__ == "__main__":
+    cluster_resolver = tf.distribute.cluster_resolver.TPUClusterResolver(
+      tpu="tf-train-1", project="red-atlas-305317", zone="us-central1-a")
+    tf.config.experimental_connect_to_cluster(cluster_resolver)
+    tf.tpu.experimental.initialize_tpu_system(cluster_resolver)
+    strategy = tf.distribute.TPUStrategy(cluster_resolver)
\ No newline at end of file
diff --git a/models/official/projects/maskformer/train.py b/models/official/projects/maskformer/train.py
index df0f4779..c1f04537 100644
--- a/models/official/projects/maskformer/train.py
+++ b/models/official/projects/maskformer/train.py
@@ -12,8 +12,16 @@
 from official.modeling import performance
 from official.projects.maskformer.configs import maskformer
 from official.projects.maskformer.tasks import panoptic_maskformer
+import tensorflow as tf
+from cloud_tpu_client import Client
+import os
 
+FLAGS = flags.FLAGS
 def main(_):
+	# This works only for TPU v3 version
+	c = Client(os.environ['TPU_NAME'], zone=os.environ['TPU_ZONE'], project=os.environ['TPU_PROJECT'])
+	c.configure_tpu_version(os.environ["TPU_SOFTWARE"], restart_type='ifNeeded')
+	c.wait_for_healthy()
 	gin.parse_config_files_and_bindings(FLAGS.gin_file, FLAGS.gin_params)
 	params = train_utils.parse_configuration(FLAGS)
 	model_dir = FLAGS.model_dir
@@ -26,16 +34,31 @@ def main(_):
 	# can have significant impact on model speeds by utilizing float16 in case of
 	# GPUs, and bfloat16 in the case of TPUs. loss_scale takes effect only when
 	# dtype is float16
+
+	# resolver = tf.distribute.cluster_resolver.TPUClusterResolver()
+
+	# Uncomment to test on TPU
 	if params.runtime.mixed_precision_dtype:
 		performance.set_mixed_precision_policy(params.runtime.mixed_precision_dtype)
 	distribution_strategy = distribute_utils.get_distribution_strategy(
-			distribution_strategy=params.runtime.distribution_strategy,
+			distribution_strategy="tpu",
 			all_reduce_alg=params.runtime.all_reduce_alg,
 			num_gpus=params.runtime.num_gpus,
 			tpu_address=params.runtime.tpu)
+	
+	# Comment if running on TPU
+	# if params.runtime.mixed_precision_dtype:
+	# 	performance.set_mixed_precision_policy(params.runtime.mixed_precision_dtype)
+	# distribution_strategy = distribute_utils.get_distribution_strategy(
+	# 		distribution_strategy=params.runtime.distribution_strategy,
+	# 		all_reduce_alg=params.runtime.all_reduce_alg,
+	# 		num_gpus=1)
+	
+	# Below code is independent of compute platform
 	with distribution_strategy.scope():
 		task = task_factory.get_task(params.task, logging_dir=model_dir)
-
+		
+	
 	train_lib.run_experiment(
 			distribution_strategy=distribution_strategy,
 			task=task,
diff --git a/models/official/projects/maskformer/train_r50.sh b/models/official/projects/maskformer/train_r50.sh
index 9c57ca22..5315f7a7 100644
--- a/models/official/projects/maskformer/train_r50.sh
+++ b/models/official/projects/maskformer/train_r50.sh
@@ -1,17 +1,24 @@
-$ export MODEL_DIR="gs://<path-to-model-directory>"
-$ export TPU_NAME="<tpu-name>"
-$ export ANNOTATION_FILE="gs://<path-to-coco-annotation-json>"
-$ export TRAIN_DATA="gs://<path-to-train-data>"
-$ export EVAL_DATA="gs://<path-to-eval-data>"
-$ export OVERRIDES="task.validation_data.input_path=${EVAL_DATA},\
-task.train_data.input_path=${TRAIN_DATA},\
-task.annotation_file=${ANNOTATION_FILE},\
-runtime.distribution_strategy=tpu"
-
-
-$ python3 train.py \
-  --experiment panoptic_r50_coco \
-  --config_file configs/experiments/panoptic_coco_r50.yaml \
+#!/bin/bash
+fusermount -u ~/datasets
+fusermount -u ~/models
+gcsfuse --implicit-dirs cam2-datasets ~/datasets
+gcsfuse cam2-models ~/models
+export PYTHONPATH=$PYTHONPATH:~/tf-maskformer/models
+export MODEL_DIR="gs://cam2-models/maskformer"
+export DATA_PTH="gs://cam2-datasets/coco_panoptic"
+export TPU_NAME="tf-debug-eu-1"
+export TPU_SOFTWARE="2.11.0"
+export TPU_PROJECT="red-atlas-305317"
+export TPU_ZONE="europe-west4-a"
+# export OVERRIDES="task.validation_data.input_path=${DATA_PTH},\
+# task.train_data.input_path=${DATA_PTH},\
+# runtime.distribution_strategy=tpu"
+export OVERRIDES="runtime.distribution_strategy=tpu,\
+runtime.mixed_precision_dtype=bfloat16,\
+trainer.train_steps=554400,\
+trainer.optimizer_config.learning_rate.stepwise.boundaries=[369600]"
+python3 models/official/projects/maskformer/train.py \
+  --experiment maskformer_coco_panoptic \
   --mode train \
   --model_dir $MODEL_DIR \
   --tpu $TPU_NAME \
diff --git a/models/official/vision/__pycache__/__init__.cpython-38.pyc b/models/official/vision/__pycache__/__init__.cpython-38.pyc
new file mode 100644
index 00000000..1246d61a
Binary files /dev/null and b/models/official/vision/__pycache__/__init__.cpython-38.pyc differ
diff --git a/models/official/vision/__pycache__/__init__.cpython-39.pyc b/models/official/vision/__pycache__/__init__.cpython-39.pyc
index 2eceb02d..57c5135a 100644
Binary files a/models/official/vision/__pycache__/__init__.cpython-39.pyc and b/models/official/vision/__pycache__/__init__.cpython-39.pyc differ
diff --git a/models/official/vision/configs/__pycache__/__init__.cpython-38.pyc b/models/official/vision/configs/__pycache__/__init__.cpython-38.pyc
new file mode 100644
index 00000000..4da6602c
Binary files /dev/null and b/models/official/vision/configs/__pycache__/__init__.cpython-38.pyc differ
diff --git a/models/official/vision/configs/__pycache__/__init__.cpython-39.pyc b/models/official/vision/configs/__pycache__/__init__.cpython-39.pyc
index 60fff740..ad1a4c84 100644
Binary files a/models/official/vision/configs/__pycache__/__init__.cpython-39.pyc and b/models/official/vision/configs/__pycache__/__init__.cpython-39.pyc differ
diff --git a/models/official/vision/configs/__pycache__/backbones.cpython-38.pyc b/models/official/vision/configs/__pycache__/backbones.cpython-38.pyc
new file mode 100644
index 00000000..55e2ac9d
Binary files /dev/null and b/models/official/vision/configs/__pycache__/backbones.cpython-38.pyc differ
diff --git a/models/official/vision/configs/__pycache__/backbones.cpython-39.pyc b/models/official/vision/configs/__pycache__/backbones.cpython-39.pyc
index 290e4b3d..09c16202 100644
Binary files a/models/official/vision/configs/__pycache__/backbones.cpython-39.pyc and b/models/official/vision/configs/__pycache__/backbones.cpython-39.pyc differ
diff --git a/models/official/vision/configs/__pycache__/backbones_3d.cpython-38.pyc b/models/official/vision/configs/__pycache__/backbones_3d.cpython-38.pyc
new file mode 100644
index 00000000..99b4a6dd
Binary files /dev/null and b/models/official/vision/configs/__pycache__/backbones_3d.cpython-38.pyc differ
diff --git a/models/official/vision/configs/__pycache__/backbones_3d.cpython-39.pyc b/models/official/vision/configs/__pycache__/backbones_3d.cpython-39.pyc
index 16738c50..0c39e9d9 100644
Binary files a/models/official/vision/configs/__pycache__/backbones_3d.cpython-39.pyc and b/models/official/vision/configs/__pycache__/backbones_3d.cpython-39.pyc differ
diff --git a/models/official/vision/configs/__pycache__/common.cpython-38.pyc b/models/official/vision/configs/__pycache__/common.cpython-38.pyc
new file mode 100644
index 00000000..95ceb60b
Binary files /dev/null and b/models/official/vision/configs/__pycache__/common.cpython-38.pyc differ
diff --git a/models/official/vision/configs/__pycache__/common.cpython-39.pyc b/models/official/vision/configs/__pycache__/common.cpython-39.pyc
index 52a219f1..ecaeadae 100644
Binary files a/models/official/vision/configs/__pycache__/common.cpython-39.pyc and b/models/official/vision/configs/__pycache__/common.cpython-39.pyc differ
diff --git a/models/official/vision/configs/__pycache__/decoders.cpython-39.pyc b/models/official/vision/configs/__pycache__/decoders.cpython-39.pyc
index e3ff8906..2c0cffbc 100644
Binary files a/models/official/vision/configs/__pycache__/decoders.cpython-39.pyc and b/models/official/vision/configs/__pycache__/decoders.cpython-39.pyc differ
diff --git a/models/official/vision/configs/__pycache__/image_classification.cpython-39.pyc b/models/official/vision/configs/__pycache__/image_classification.cpython-39.pyc
index 411e29ef..76bb702e 100644
Binary files a/models/official/vision/configs/__pycache__/image_classification.cpython-39.pyc and b/models/official/vision/configs/__pycache__/image_classification.cpython-39.pyc differ
diff --git a/models/official/vision/configs/__pycache__/maskrcnn.cpython-39.pyc b/models/official/vision/configs/__pycache__/maskrcnn.cpython-39.pyc
index 625aedec..378f2ce1 100644
Binary files a/models/official/vision/configs/__pycache__/maskrcnn.cpython-39.pyc and b/models/official/vision/configs/__pycache__/maskrcnn.cpython-39.pyc differ
diff --git a/models/official/vision/configs/__pycache__/retinanet.cpython-39.pyc b/models/official/vision/configs/__pycache__/retinanet.cpython-39.pyc
index cc6083e9..867ba786 100644
Binary files a/models/official/vision/configs/__pycache__/retinanet.cpython-39.pyc and b/models/official/vision/configs/__pycache__/retinanet.cpython-39.pyc differ
diff --git a/models/official/vision/configs/__pycache__/semantic_segmentation.cpython-39.pyc b/models/official/vision/configs/__pycache__/semantic_segmentation.cpython-39.pyc
index 339bf33d..a38479f2 100644
Binary files a/models/official/vision/configs/__pycache__/semantic_segmentation.cpython-39.pyc and b/models/official/vision/configs/__pycache__/semantic_segmentation.cpython-39.pyc differ
diff --git a/models/official/vision/configs/__pycache__/video_classification.cpython-39.pyc b/models/official/vision/configs/__pycache__/video_classification.cpython-39.pyc
index a38e0f5c..e1fbe521 100644
Binary files a/models/official/vision/configs/__pycache__/video_classification.cpython-39.pyc and b/models/official/vision/configs/__pycache__/video_classification.cpython-39.pyc differ
diff --git a/models/official/vision/data/__pycache__/__init__.cpython-39.pyc b/models/official/vision/data/__pycache__/__init__.cpython-39.pyc
new file mode 100644
index 00000000..60ae193f
Binary files /dev/null and b/models/official/vision/data/__pycache__/__init__.cpython-39.pyc differ
diff --git a/models/official/vision/data/__pycache__/tfrecord_lib.cpython-39.pyc b/models/official/vision/data/__pycache__/tfrecord_lib.cpython-39.pyc
new file mode 100644
index 00000000..5a62d13d
Binary files /dev/null and b/models/official/vision/data/__pycache__/tfrecord_lib.cpython-39.pyc differ
diff --git a/models/official/vision/data/create_coco_tf_record.py b/models/official/vision/data/create_coco_tf_record.py
index cfcd679a..334db760 100644
--- a/models/official/vision/data/create_coco_tf_record.py
+++ b/models/official/vision/data/create_coco_tf_record.py
@@ -138,12 +138,16 @@ def generate_coco_panoptics_masks(segments_info, mask_path,
     segment_id = segment['id']
     category_id = segment['category_id']
     is_crowd = segment['iscrowd']
+
     if FLAGS.panoptic_skip_crowd and is_crowd:
       continue
+
     if is_category_thing[category_id]:
+      # This for thing
       encoded_category_id = _THING_CLASS_ID
       instance_id = idx + 1
     else:
+      # This is for stuff (for stuff no instance id)
       encoded_category_id = category_id - _STUFF_CLASSES_OFFSET
       instance_id = _VOID_INSTANCE_ID
 
@@ -151,7 +155,7 @@ def generate_coco_panoptics_masks(segments_info, mask_path,
     semantic_segmentation_mask[segment_mask] = encoded_category_id
 
     if include_panoptic_masks:
-      category_mask[segment_mask] = category_id
+      category_mask[segment_mask] =  category_id
       instance_mask[segment_mask] = instance_id
 
   outputs = {
diff --git a/models/official/vision/data/tfrecord_lib.py b/models/official/vision/data/tfrecord_lib.py
index 4eeef2d8..f4090c04 100644
--- a/models/official/vision/data/tfrecord_lib.py
+++ b/models/official/vision/data/tfrecord_lib.py
@@ -24,7 +24,7 @@
 import tensorflow as tf
 
 import multiprocessing as mp
-
+from tqdm import tqdm
 
 LOG_EVERY = 100
 
@@ -162,8 +162,8 @@ def write_tf_record_dataset(output_path, annotation_iterator,
     else:
       tf_example_iterator = map(process_func, annotation_iterator)
 
-  for idx, (tf_example, num_annotations_skipped) in enumerate(
-      tf_example_iterator):
+  for idx, (tf_example, num_annotations_skipped) in tqdm(enumerate(
+      tf_example_iterator)):
     if idx % LOG_EVERY == 0:
       logging.info('On image %d', idx)
 
diff --git a/models/official/vision/dataloaders/__pycache__/__init__.cpython-39.pyc b/models/official/vision/dataloaders/__pycache__/__init__.cpython-39.pyc
index c3500ec6..9909857c 100644
Binary files a/models/official/vision/dataloaders/__pycache__/__init__.cpython-39.pyc and b/models/official/vision/dataloaders/__pycache__/__init__.cpython-39.pyc differ
diff --git a/models/official/vision/dataloaders/__pycache__/classification_input.cpython-39.pyc b/models/official/vision/dataloaders/__pycache__/classification_input.cpython-39.pyc
index ca5fc162..226533cd 100644
Binary files a/models/official/vision/dataloaders/__pycache__/classification_input.cpython-39.pyc and b/models/official/vision/dataloaders/__pycache__/classification_input.cpython-39.pyc differ
diff --git a/models/official/vision/dataloaders/__pycache__/decoder.cpython-39.pyc b/models/official/vision/dataloaders/__pycache__/decoder.cpython-39.pyc
index 36cc59fb..dd42bb91 100644
Binary files a/models/official/vision/dataloaders/__pycache__/decoder.cpython-39.pyc and b/models/official/vision/dataloaders/__pycache__/decoder.cpython-39.pyc differ
diff --git a/models/official/vision/dataloaders/__pycache__/input_reader.cpython-39.pyc b/models/official/vision/dataloaders/__pycache__/input_reader.cpython-39.pyc
index acf695f3..c7241086 100644
Binary files a/models/official/vision/dataloaders/__pycache__/input_reader.cpython-39.pyc and b/models/official/vision/dataloaders/__pycache__/input_reader.cpython-39.pyc differ
diff --git a/models/official/vision/dataloaders/__pycache__/input_reader_factory.cpython-39.pyc b/models/official/vision/dataloaders/__pycache__/input_reader_factory.cpython-39.pyc
index 8bff6a80..edd4a5bd 100644
Binary files a/models/official/vision/dataloaders/__pycache__/input_reader_factory.cpython-39.pyc and b/models/official/vision/dataloaders/__pycache__/input_reader_factory.cpython-39.pyc differ
diff --git a/models/official/vision/dataloaders/__pycache__/maskrcnn_input.cpython-39.pyc b/models/official/vision/dataloaders/__pycache__/maskrcnn_input.cpython-39.pyc
index 89b4c945..96b81966 100644
Binary files a/models/official/vision/dataloaders/__pycache__/maskrcnn_input.cpython-39.pyc and b/models/official/vision/dataloaders/__pycache__/maskrcnn_input.cpython-39.pyc differ
diff --git a/models/official/vision/dataloaders/__pycache__/parser.cpython-39.pyc b/models/official/vision/dataloaders/__pycache__/parser.cpython-39.pyc
index 13b01a4c..13779528 100644
Binary files a/models/official/vision/dataloaders/__pycache__/parser.cpython-39.pyc and b/models/official/vision/dataloaders/__pycache__/parser.cpython-39.pyc differ
diff --git a/models/official/vision/dataloaders/__pycache__/retinanet_input.cpython-39.pyc b/models/official/vision/dataloaders/__pycache__/retinanet_input.cpython-39.pyc
index b071b213..8a64cc4d 100644
Binary files a/models/official/vision/dataloaders/__pycache__/retinanet_input.cpython-39.pyc and b/models/official/vision/dataloaders/__pycache__/retinanet_input.cpython-39.pyc differ
diff --git a/models/official/vision/dataloaders/__pycache__/segmentation_input.cpython-39.pyc b/models/official/vision/dataloaders/__pycache__/segmentation_input.cpython-39.pyc
index 665ac509..eb593f60 100644
Binary files a/models/official/vision/dataloaders/__pycache__/segmentation_input.cpython-39.pyc and b/models/official/vision/dataloaders/__pycache__/segmentation_input.cpython-39.pyc differ
diff --git a/models/official/vision/dataloaders/__pycache__/tf_example_decoder.cpython-39.pyc b/models/official/vision/dataloaders/__pycache__/tf_example_decoder.cpython-39.pyc
index 47fbf088..ad9fd8ae 100644
Binary files a/models/official/vision/dataloaders/__pycache__/tf_example_decoder.cpython-39.pyc and b/models/official/vision/dataloaders/__pycache__/tf_example_decoder.cpython-39.pyc differ
diff --git a/models/official/vision/dataloaders/__pycache__/tf_example_label_map_decoder.cpython-39.pyc b/models/official/vision/dataloaders/__pycache__/tf_example_label_map_decoder.cpython-39.pyc
index 3d6cad8d..303eb8f7 100644
Binary files a/models/official/vision/dataloaders/__pycache__/tf_example_label_map_decoder.cpython-39.pyc and b/models/official/vision/dataloaders/__pycache__/tf_example_label_map_decoder.cpython-39.pyc differ
diff --git a/models/official/vision/dataloaders/__pycache__/tfds_classification_decoders.cpython-39.pyc b/models/official/vision/dataloaders/__pycache__/tfds_classification_decoders.cpython-39.pyc
index a32724a8..84fc2187 100644
Binary files a/models/official/vision/dataloaders/__pycache__/tfds_classification_decoders.cpython-39.pyc and b/models/official/vision/dataloaders/__pycache__/tfds_classification_decoders.cpython-39.pyc differ
diff --git a/models/official/vision/dataloaders/__pycache__/tfds_detection_decoders.cpython-39.pyc b/models/official/vision/dataloaders/__pycache__/tfds_detection_decoders.cpython-39.pyc
index 4d7dc106..e20c9b71 100644
Binary files a/models/official/vision/dataloaders/__pycache__/tfds_detection_decoders.cpython-39.pyc and b/models/official/vision/dataloaders/__pycache__/tfds_detection_decoders.cpython-39.pyc differ
diff --git a/models/official/vision/dataloaders/__pycache__/tfds_factory.cpython-39.pyc b/models/official/vision/dataloaders/__pycache__/tfds_factory.cpython-39.pyc
index 91b56792..6b35c2ff 100644
Binary files a/models/official/vision/dataloaders/__pycache__/tfds_factory.cpython-39.pyc and b/models/official/vision/dataloaders/__pycache__/tfds_factory.cpython-39.pyc differ
diff --git a/models/official/vision/dataloaders/__pycache__/tfds_segmentation_decoders.cpython-39.pyc b/models/official/vision/dataloaders/__pycache__/tfds_segmentation_decoders.cpython-39.pyc
index 9ae57ad8..eeb00ef2 100644
Binary files a/models/official/vision/dataloaders/__pycache__/tfds_segmentation_decoders.cpython-39.pyc and b/models/official/vision/dataloaders/__pycache__/tfds_segmentation_decoders.cpython-39.pyc differ
diff --git a/models/official/vision/dataloaders/__pycache__/utils.cpython-39.pyc b/models/official/vision/dataloaders/__pycache__/utils.cpython-39.pyc
index b8d29f45..922109c7 100644
Binary files a/models/official/vision/dataloaders/__pycache__/utils.cpython-39.pyc and b/models/official/vision/dataloaders/__pycache__/utils.cpython-39.pyc differ
diff --git a/models/official/vision/dataloaders/__pycache__/video_input.cpython-39.pyc b/models/official/vision/dataloaders/__pycache__/video_input.cpython-39.pyc
index 6b4f4645..f03e5448 100644
Binary files a/models/official/vision/dataloaders/__pycache__/video_input.cpython-39.pyc and b/models/official/vision/dataloaders/__pycache__/video_input.cpython-39.pyc differ
diff --git a/models/official/vision/dataloaders/input_reader.py b/models/official/vision/dataloaders/input_reader.py
index 38cae7f5..af13ba0d 100644
--- a/models/official/vision/dataloaders/input_reader.py
+++ b/models/official/vision/dataloaders/input_reader.py
@@ -190,7 +190,7 @@ def read(
     labeled_batch_size, pl_batch_size = calculate_batch_sizes(
         self._global_batch_size, self._pseudo_label_data_ratio,
         self._pseudo_label_batch_size)
-
+    
     if not labeled_batch_size and pl_batch_size:
       raise ValueError(
           'Invalid batch_size: {} and pseudo_label_data_ratio: {}, '
diff --git a/models/official/vision/evaluation/__pycache__/__init__.cpython-39.pyc b/models/official/vision/evaluation/__pycache__/__init__.cpython-39.pyc
index 989be085..4772afe6 100644
Binary files a/models/official/vision/evaluation/__pycache__/__init__.cpython-39.pyc and b/models/official/vision/evaluation/__pycache__/__init__.cpython-39.pyc differ
diff --git a/models/official/vision/evaluation/__pycache__/coco_evaluator.cpython-39.pyc b/models/official/vision/evaluation/__pycache__/coco_evaluator.cpython-39.pyc
index 924de2bf..cd52b08a 100644
Binary files a/models/official/vision/evaluation/__pycache__/coco_evaluator.cpython-39.pyc and b/models/official/vision/evaluation/__pycache__/coco_evaluator.cpython-39.pyc differ
diff --git a/models/official/vision/evaluation/__pycache__/coco_utils.cpython-39.pyc b/models/official/vision/evaluation/__pycache__/coco_utils.cpython-39.pyc
index c55b7920..f6935164 100644
Binary files a/models/official/vision/evaluation/__pycache__/coco_utils.cpython-39.pyc and b/models/official/vision/evaluation/__pycache__/coco_utils.cpython-39.pyc differ
diff --git a/models/official/vision/evaluation/__pycache__/instance_metrics.cpython-39.pyc b/models/official/vision/evaluation/__pycache__/instance_metrics.cpython-39.pyc
index 9df5ec50..3ee33b31 100644
Binary files a/models/official/vision/evaluation/__pycache__/instance_metrics.cpython-39.pyc and b/models/official/vision/evaluation/__pycache__/instance_metrics.cpython-39.pyc differ
diff --git a/models/official/vision/evaluation/__pycache__/iou.cpython-39.pyc b/models/official/vision/evaluation/__pycache__/iou.cpython-39.pyc
index db961088..8456c478 100644
Binary files a/models/official/vision/evaluation/__pycache__/iou.cpython-39.pyc and b/models/official/vision/evaluation/__pycache__/iou.cpython-39.pyc differ
diff --git a/models/official/vision/evaluation/__pycache__/segmentation_metrics.cpython-39.pyc b/models/official/vision/evaluation/__pycache__/segmentation_metrics.cpython-39.pyc
index 6b52d4e0..d4e3e096 100644
Binary files a/models/official/vision/evaluation/__pycache__/segmentation_metrics.cpython-39.pyc and b/models/official/vision/evaluation/__pycache__/segmentation_metrics.cpython-39.pyc differ
diff --git a/models/official/vision/losses/__pycache__/__init__.cpython-39.pyc b/models/official/vision/losses/__pycache__/__init__.cpython-39.pyc
index 53300321..53a80efd 100644
Binary files a/models/official/vision/losses/__pycache__/__init__.cpython-39.pyc and b/models/official/vision/losses/__pycache__/__init__.cpython-39.pyc differ
diff --git a/models/official/vision/losses/__pycache__/focal_loss.cpython-39.pyc b/models/official/vision/losses/__pycache__/focal_loss.cpython-39.pyc
index 4e83157b..e8d948f9 100644
Binary files a/models/official/vision/losses/__pycache__/focal_loss.cpython-39.pyc and b/models/official/vision/losses/__pycache__/focal_loss.cpython-39.pyc differ
diff --git a/models/official/vision/losses/__pycache__/loss_utils.cpython-39.pyc b/models/official/vision/losses/__pycache__/loss_utils.cpython-39.pyc
index 68041bfe..f87f15a1 100644
Binary files a/models/official/vision/losses/__pycache__/loss_utils.cpython-39.pyc and b/models/official/vision/losses/__pycache__/loss_utils.cpython-39.pyc differ
diff --git a/models/official/vision/losses/__pycache__/maskrcnn_losses.cpython-39.pyc b/models/official/vision/losses/__pycache__/maskrcnn_losses.cpython-39.pyc
index 439d779f..ca7c3836 100644
Binary files a/models/official/vision/losses/__pycache__/maskrcnn_losses.cpython-39.pyc and b/models/official/vision/losses/__pycache__/maskrcnn_losses.cpython-39.pyc differ
diff --git a/models/official/vision/losses/__pycache__/segmentation_losses.cpython-39.pyc b/models/official/vision/losses/__pycache__/segmentation_losses.cpython-39.pyc
index 47fa0bea..ae38a797 100644
Binary files a/models/official/vision/losses/__pycache__/segmentation_losses.cpython-39.pyc and b/models/official/vision/losses/__pycache__/segmentation_losses.cpython-39.pyc differ
diff --git a/models/official/vision/modeling/__pycache__/__init__.cpython-39.pyc b/models/official/vision/modeling/__pycache__/__init__.cpython-39.pyc
index 9dde48e0..b71eceeb 100644
Binary files a/models/official/vision/modeling/__pycache__/__init__.cpython-39.pyc and b/models/official/vision/modeling/__pycache__/__init__.cpython-39.pyc differ
diff --git a/models/official/vision/modeling/__pycache__/classification_model.cpython-39.pyc b/models/official/vision/modeling/__pycache__/classification_model.cpython-39.pyc
index f4168ed0..fa2ee357 100644
Binary files a/models/official/vision/modeling/__pycache__/classification_model.cpython-39.pyc and b/models/official/vision/modeling/__pycache__/classification_model.cpython-39.pyc differ
diff --git a/models/official/vision/modeling/__pycache__/factory.cpython-39.pyc b/models/official/vision/modeling/__pycache__/factory.cpython-39.pyc
index 6062f17a..58dd5240 100644
Binary files a/models/official/vision/modeling/__pycache__/factory.cpython-39.pyc and b/models/official/vision/modeling/__pycache__/factory.cpython-39.pyc differ
diff --git a/models/official/vision/modeling/__pycache__/factory_3d.cpython-39.pyc b/models/official/vision/modeling/__pycache__/factory_3d.cpython-39.pyc
index 650f2788..22537f12 100644
Binary files a/models/official/vision/modeling/__pycache__/factory_3d.cpython-39.pyc and b/models/official/vision/modeling/__pycache__/factory_3d.cpython-39.pyc differ
diff --git a/models/official/vision/modeling/__pycache__/maskrcnn_model.cpython-39.pyc b/models/official/vision/modeling/__pycache__/maskrcnn_model.cpython-39.pyc
index 6701eb93..845c463a 100644
Binary files a/models/official/vision/modeling/__pycache__/maskrcnn_model.cpython-39.pyc and b/models/official/vision/modeling/__pycache__/maskrcnn_model.cpython-39.pyc differ
diff --git a/models/official/vision/modeling/__pycache__/retinanet_model.cpython-39.pyc b/models/official/vision/modeling/__pycache__/retinanet_model.cpython-39.pyc
index 1b2ed42b..606fbce1 100644
Binary files a/models/official/vision/modeling/__pycache__/retinanet_model.cpython-39.pyc and b/models/official/vision/modeling/__pycache__/retinanet_model.cpython-39.pyc differ
diff --git a/models/official/vision/modeling/__pycache__/segmentation_model.cpython-39.pyc b/models/official/vision/modeling/__pycache__/segmentation_model.cpython-39.pyc
index f284ca5a..f30dd6d2 100644
Binary files a/models/official/vision/modeling/__pycache__/segmentation_model.cpython-39.pyc and b/models/official/vision/modeling/__pycache__/segmentation_model.cpython-39.pyc differ
diff --git a/models/official/vision/modeling/__pycache__/video_classification_model.cpython-39.pyc b/models/official/vision/modeling/__pycache__/video_classification_model.cpython-39.pyc
index aa590bea..71de84c7 100644
Binary files a/models/official/vision/modeling/__pycache__/video_classification_model.cpython-39.pyc and b/models/official/vision/modeling/__pycache__/video_classification_model.cpython-39.pyc differ
diff --git a/models/official/vision/modeling/backbones/__pycache__/__init__.cpython-39.pyc b/models/official/vision/modeling/backbones/__pycache__/__init__.cpython-39.pyc
index b360c804..b79ca790 100644
Binary files a/models/official/vision/modeling/backbones/__pycache__/__init__.cpython-39.pyc and b/models/official/vision/modeling/backbones/__pycache__/__init__.cpython-39.pyc differ
diff --git a/models/official/vision/modeling/backbones/__pycache__/efficientnet.cpython-39.pyc b/models/official/vision/modeling/backbones/__pycache__/efficientnet.cpython-39.pyc
index 4c1e7374..5de4f0d1 100644
Binary files a/models/official/vision/modeling/backbones/__pycache__/efficientnet.cpython-39.pyc and b/models/official/vision/modeling/backbones/__pycache__/efficientnet.cpython-39.pyc differ
diff --git a/models/official/vision/modeling/backbones/__pycache__/factory.cpython-39.pyc b/models/official/vision/modeling/backbones/__pycache__/factory.cpython-39.pyc
index 50017dfb..4f711e0e 100644
Binary files a/models/official/vision/modeling/backbones/__pycache__/factory.cpython-39.pyc and b/models/official/vision/modeling/backbones/__pycache__/factory.cpython-39.pyc differ
diff --git a/models/official/vision/modeling/backbones/__pycache__/mobiledet.cpython-39.pyc b/models/official/vision/modeling/backbones/__pycache__/mobiledet.cpython-39.pyc
index 988d1413..d714143d 100644
Binary files a/models/official/vision/modeling/backbones/__pycache__/mobiledet.cpython-39.pyc and b/models/official/vision/modeling/backbones/__pycache__/mobiledet.cpython-39.pyc differ
diff --git a/models/official/vision/modeling/backbones/__pycache__/mobilenet.cpython-39.pyc b/models/official/vision/modeling/backbones/__pycache__/mobilenet.cpython-39.pyc
index 1351c469..3832049c 100644
Binary files a/models/official/vision/modeling/backbones/__pycache__/mobilenet.cpython-39.pyc and b/models/official/vision/modeling/backbones/__pycache__/mobilenet.cpython-39.pyc differ
diff --git a/models/official/vision/modeling/backbones/__pycache__/resnet.cpython-39.pyc b/models/official/vision/modeling/backbones/__pycache__/resnet.cpython-39.pyc
index 612d10fe..8aa81a07 100644
Binary files a/models/official/vision/modeling/backbones/__pycache__/resnet.cpython-39.pyc and b/models/official/vision/modeling/backbones/__pycache__/resnet.cpython-39.pyc differ
diff --git a/models/official/vision/modeling/backbones/__pycache__/resnet_3d.cpython-39.pyc b/models/official/vision/modeling/backbones/__pycache__/resnet_3d.cpython-39.pyc
index 38a7fdd2..c8695b19 100644
Binary files a/models/official/vision/modeling/backbones/__pycache__/resnet_3d.cpython-39.pyc and b/models/official/vision/modeling/backbones/__pycache__/resnet_3d.cpython-39.pyc differ
diff --git a/models/official/vision/modeling/backbones/__pycache__/resnet_deeplab.cpython-39.pyc b/models/official/vision/modeling/backbones/__pycache__/resnet_deeplab.cpython-39.pyc
index 6c00f0d0..152c8a91 100644
Binary files a/models/official/vision/modeling/backbones/__pycache__/resnet_deeplab.cpython-39.pyc and b/models/official/vision/modeling/backbones/__pycache__/resnet_deeplab.cpython-39.pyc differ
diff --git a/models/official/vision/modeling/backbones/__pycache__/revnet.cpython-39.pyc b/models/official/vision/modeling/backbones/__pycache__/revnet.cpython-39.pyc
index 633e7450..fb7b3de5 100644
Binary files a/models/official/vision/modeling/backbones/__pycache__/revnet.cpython-39.pyc and b/models/official/vision/modeling/backbones/__pycache__/revnet.cpython-39.pyc differ
diff --git a/models/official/vision/modeling/backbones/__pycache__/spinenet.cpython-39.pyc b/models/official/vision/modeling/backbones/__pycache__/spinenet.cpython-39.pyc
index c69a61cd..0fbecda8 100644
Binary files a/models/official/vision/modeling/backbones/__pycache__/spinenet.cpython-39.pyc and b/models/official/vision/modeling/backbones/__pycache__/spinenet.cpython-39.pyc differ
diff --git a/models/official/vision/modeling/backbones/__pycache__/spinenet_mobile.cpython-39.pyc b/models/official/vision/modeling/backbones/__pycache__/spinenet_mobile.cpython-39.pyc
index 285c9424..89c9ea4a 100644
Binary files a/models/official/vision/modeling/backbones/__pycache__/spinenet_mobile.cpython-39.pyc and b/models/official/vision/modeling/backbones/__pycache__/spinenet_mobile.cpython-39.pyc differ
diff --git a/models/official/vision/modeling/backbones/__pycache__/vit.cpython-39.pyc b/models/official/vision/modeling/backbones/__pycache__/vit.cpython-39.pyc
index 6fe2e4cc..db368c9b 100644
Binary files a/models/official/vision/modeling/backbones/__pycache__/vit.cpython-39.pyc and b/models/official/vision/modeling/backbones/__pycache__/vit.cpython-39.pyc differ
diff --git a/models/official/vision/modeling/backbones/__pycache__/vit_specs.cpython-39.pyc b/models/official/vision/modeling/backbones/__pycache__/vit_specs.cpython-39.pyc
index 26524178..783f5f2f 100644
Binary files a/models/official/vision/modeling/backbones/__pycache__/vit_specs.cpython-39.pyc and b/models/official/vision/modeling/backbones/__pycache__/vit_specs.cpython-39.pyc differ
diff --git a/models/official/vision/modeling/decoders/__pycache__/__init__.cpython-39.pyc b/models/official/vision/modeling/decoders/__pycache__/__init__.cpython-39.pyc
index 74fc63a2..1e5dcb36 100644
Binary files a/models/official/vision/modeling/decoders/__pycache__/__init__.cpython-39.pyc and b/models/official/vision/modeling/decoders/__pycache__/__init__.cpython-39.pyc differ
diff --git a/models/official/vision/modeling/decoders/__pycache__/aspp.cpython-39.pyc b/models/official/vision/modeling/decoders/__pycache__/aspp.cpython-39.pyc
index b496cf76..85cd96a8 100644
Binary files a/models/official/vision/modeling/decoders/__pycache__/aspp.cpython-39.pyc and b/models/official/vision/modeling/decoders/__pycache__/aspp.cpython-39.pyc differ
diff --git a/models/official/vision/modeling/decoders/__pycache__/factory.cpython-39.pyc b/models/official/vision/modeling/decoders/__pycache__/factory.cpython-39.pyc
index 59969ae8..5751a8e6 100644
Binary files a/models/official/vision/modeling/decoders/__pycache__/factory.cpython-39.pyc and b/models/official/vision/modeling/decoders/__pycache__/factory.cpython-39.pyc differ
diff --git a/models/official/vision/modeling/decoders/__pycache__/fpn.cpython-39.pyc b/models/official/vision/modeling/decoders/__pycache__/fpn.cpython-39.pyc
index e8521fc4..3bf9cd83 100644
Binary files a/models/official/vision/modeling/decoders/__pycache__/fpn.cpython-39.pyc and b/models/official/vision/modeling/decoders/__pycache__/fpn.cpython-39.pyc differ
diff --git a/models/official/vision/modeling/decoders/__pycache__/nasfpn.cpython-39.pyc b/models/official/vision/modeling/decoders/__pycache__/nasfpn.cpython-39.pyc
index 47fdbf1a..1157544d 100644
Binary files a/models/official/vision/modeling/decoders/__pycache__/nasfpn.cpython-39.pyc and b/models/official/vision/modeling/decoders/__pycache__/nasfpn.cpython-39.pyc differ
diff --git a/models/official/vision/modeling/heads/__pycache__/__init__.cpython-39.pyc b/models/official/vision/modeling/heads/__pycache__/__init__.cpython-39.pyc
index 1eb8bd44..397863d7 100644
Binary files a/models/official/vision/modeling/heads/__pycache__/__init__.cpython-39.pyc and b/models/official/vision/modeling/heads/__pycache__/__init__.cpython-39.pyc differ
diff --git a/models/official/vision/modeling/heads/__pycache__/dense_prediction_heads.cpython-39.pyc b/models/official/vision/modeling/heads/__pycache__/dense_prediction_heads.cpython-39.pyc
index ae51ea84..71e746fa 100644
Binary files a/models/official/vision/modeling/heads/__pycache__/dense_prediction_heads.cpython-39.pyc and b/models/official/vision/modeling/heads/__pycache__/dense_prediction_heads.cpython-39.pyc differ
diff --git a/models/official/vision/modeling/heads/__pycache__/instance_heads.cpython-39.pyc b/models/official/vision/modeling/heads/__pycache__/instance_heads.cpython-39.pyc
index d2b3e6b6..2280714a 100644
Binary files a/models/official/vision/modeling/heads/__pycache__/instance_heads.cpython-39.pyc and b/models/official/vision/modeling/heads/__pycache__/instance_heads.cpython-39.pyc differ
diff --git a/models/official/vision/modeling/heads/__pycache__/segmentation_heads.cpython-39.pyc b/models/official/vision/modeling/heads/__pycache__/segmentation_heads.cpython-39.pyc
index 7dc95b30..511f2f6b 100644
Binary files a/models/official/vision/modeling/heads/__pycache__/segmentation_heads.cpython-39.pyc and b/models/official/vision/modeling/heads/__pycache__/segmentation_heads.cpython-39.pyc differ
diff --git a/models/official/vision/modeling/layers/__pycache__/__init__.cpython-39.pyc b/models/official/vision/modeling/layers/__pycache__/__init__.cpython-39.pyc
index c6463ad9..f3af43b2 100644
Binary files a/models/official/vision/modeling/layers/__pycache__/__init__.cpython-39.pyc and b/models/official/vision/modeling/layers/__pycache__/__init__.cpython-39.pyc differ
diff --git a/models/official/vision/modeling/layers/__pycache__/box_sampler.cpython-39.pyc b/models/official/vision/modeling/layers/__pycache__/box_sampler.cpython-39.pyc
index aea33ee2..ebb879c8 100644
Binary files a/models/official/vision/modeling/layers/__pycache__/box_sampler.cpython-39.pyc and b/models/official/vision/modeling/layers/__pycache__/box_sampler.cpython-39.pyc differ
diff --git a/models/official/vision/modeling/layers/__pycache__/deeplab.cpython-39.pyc b/models/official/vision/modeling/layers/__pycache__/deeplab.cpython-39.pyc
index 67ca134f..c229b763 100644
Binary files a/models/official/vision/modeling/layers/__pycache__/deeplab.cpython-39.pyc and b/models/official/vision/modeling/layers/__pycache__/deeplab.cpython-39.pyc differ
diff --git a/models/official/vision/modeling/layers/__pycache__/detection_generator.cpython-39.pyc b/models/official/vision/modeling/layers/__pycache__/detection_generator.cpython-39.pyc
index 216c232b..12a2d87c 100644
Binary files a/models/official/vision/modeling/layers/__pycache__/detection_generator.cpython-39.pyc and b/models/official/vision/modeling/layers/__pycache__/detection_generator.cpython-39.pyc differ
diff --git a/models/official/vision/modeling/layers/__pycache__/edgetpu.cpython-39.pyc b/models/official/vision/modeling/layers/__pycache__/edgetpu.cpython-39.pyc
index c990fdf2..70361410 100644
Binary files a/models/official/vision/modeling/layers/__pycache__/edgetpu.cpython-39.pyc and b/models/official/vision/modeling/layers/__pycache__/edgetpu.cpython-39.pyc differ
diff --git a/models/official/vision/modeling/layers/__pycache__/mask_sampler.cpython-39.pyc b/models/official/vision/modeling/layers/__pycache__/mask_sampler.cpython-39.pyc
index 9e558f77..3f0be642 100644
Binary files a/models/official/vision/modeling/layers/__pycache__/mask_sampler.cpython-39.pyc and b/models/official/vision/modeling/layers/__pycache__/mask_sampler.cpython-39.pyc differ
diff --git a/models/official/vision/modeling/layers/__pycache__/nn_blocks.cpython-39.pyc b/models/official/vision/modeling/layers/__pycache__/nn_blocks.cpython-39.pyc
index b900218b..5ce293ef 100644
Binary files a/models/official/vision/modeling/layers/__pycache__/nn_blocks.cpython-39.pyc and b/models/official/vision/modeling/layers/__pycache__/nn_blocks.cpython-39.pyc differ
diff --git a/models/official/vision/modeling/layers/__pycache__/nn_blocks_3d.cpython-39.pyc b/models/official/vision/modeling/layers/__pycache__/nn_blocks_3d.cpython-39.pyc
index b7fa4e0c..4839befa 100644
Binary files a/models/official/vision/modeling/layers/__pycache__/nn_blocks_3d.cpython-39.pyc and b/models/official/vision/modeling/layers/__pycache__/nn_blocks_3d.cpython-39.pyc differ
diff --git a/models/official/vision/modeling/layers/__pycache__/nn_layers.cpython-39.pyc b/models/official/vision/modeling/layers/__pycache__/nn_layers.cpython-39.pyc
index 3fcce21c..16273210 100644
Binary files a/models/official/vision/modeling/layers/__pycache__/nn_layers.cpython-39.pyc and b/models/official/vision/modeling/layers/__pycache__/nn_layers.cpython-39.pyc differ
diff --git a/models/official/vision/modeling/layers/__pycache__/roi_aligner.cpython-39.pyc b/models/official/vision/modeling/layers/__pycache__/roi_aligner.cpython-39.pyc
index c4f341b9..87e650cf 100644
Binary files a/models/official/vision/modeling/layers/__pycache__/roi_aligner.cpython-39.pyc and b/models/official/vision/modeling/layers/__pycache__/roi_aligner.cpython-39.pyc differ
diff --git a/models/official/vision/modeling/layers/__pycache__/roi_generator.cpython-39.pyc b/models/official/vision/modeling/layers/__pycache__/roi_generator.cpython-39.pyc
index 84c2da2a..16171547 100644
Binary files a/models/official/vision/modeling/layers/__pycache__/roi_generator.cpython-39.pyc and b/models/official/vision/modeling/layers/__pycache__/roi_generator.cpython-39.pyc differ
diff --git a/models/official/vision/modeling/layers/__pycache__/roi_sampler.cpython-39.pyc b/models/official/vision/modeling/layers/__pycache__/roi_sampler.cpython-39.pyc
index 92e41c57..6af07b87 100644
Binary files a/models/official/vision/modeling/layers/__pycache__/roi_sampler.cpython-39.pyc and b/models/official/vision/modeling/layers/__pycache__/roi_sampler.cpython-39.pyc differ
diff --git a/models/official/vision/modeling/models/__pycache__/__init__.cpython-39.pyc b/models/official/vision/modeling/models/__pycache__/__init__.cpython-39.pyc
index 09392f15..d23c24cb 100644
Binary files a/models/official/vision/modeling/models/__pycache__/__init__.cpython-39.pyc and b/models/official/vision/modeling/models/__pycache__/__init__.cpython-39.pyc differ
diff --git a/models/official/vision/ops/__pycache__/__init__.cpython-39.pyc b/models/official/vision/ops/__pycache__/__init__.cpython-39.pyc
index 29429709..47e09880 100644
Binary files a/models/official/vision/ops/__pycache__/__init__.cpython-39.pyc and b/models/official/vision/ops/__pycache__/__init__.cpython-39.pyc differ
diff --git a/models/official/vision/ops/__pycache__/anchor.cpython-39.pyc b/models/official/vision/ops/__pycache__/anchor.cpython-39.pyc
index 416c6774..43764d57 100644
Binary files a/models/official/vision/ops/__pycache__/anchor.cpython-39.pyc and b/models/official/vision/ops/__pycache__/anchor.cpython-39.pyc differ
diff --git a/models/official/vision/ops/__pycache__/anchor_generator.cpython-39.pyc b/models/official/vision/ops/__pycache__/anchor_generator.cpython-39.pyc
index eac01ad4..31b590e0 100644
Binary files a/models/official/vision/ops/__pycache__/anchor_generator.cpython-39.pyc and b/models/official/vision/ops/__pycache__/anchor_generator.cpython-39.pyc differ
diff --git a/models/official/vision/ops/__pycache__/augment.cpython-39.pyc b/models/official/vision/ops/__pycache__/augment.cpython-39.pyc
index a89d45c7..96d671f4 100644
Binary files a/models/official/vision/ops/__pycache__/augment.cpython-39.pyc and b/models/official/vision/ops/__pycache__/augment.cpython-39.pyc differ
diff --git a/models/official/vision/ops/__pycache__/box_matcher.cpython-39.pyc b/models/official/vision/ops/__pycache__/box_matcher.cpython-39.pyc
index e37f2ea4..49ae0429 100644
Binary files a/models/official/vision/ops/__pycache__/box_matcher.cpython-39.pyc and b/models/official/vision/ops/__pycache__/box_matcher.cpython-39.pyc differ
diff --git a/models/official/vision/ops/__pycache__/box_ops.cpython-39.pyc b/models/official/vision/ops/__pycache__/box_ops.cpython-39.pyc
index 9b22a986..8deca1b5 100644
Binary files a/models/official/vision/ops/__pycache__/box_ops.cpython-39.pyc and b/models/official/vision/ops/__pycache__/box_ops.cpython-39.pyc differ
diff --git a/models/official/vision/ops/__pycache__/iou_similarity.cpython-39.pyc b/models/official/vision/ops/__pycache__/iou_similarity.cpython-39.pyc
index 5fb687e3..f69ba4b8 100644
Binary files a/models/official/vision/ops/__pycache__/iou_similarity.cpython-39.pyc and b/models/official/vision/ops/__pycache__/iou_similarity.cpython-39.pyc differ
diff --git a/models/official/vision/ops/__pycache__/mask_ops.cpython-39.pyc b/models/official/vision/ops/__pycache__/mask_ops.cpython-39.pyc
index 773a39c0..4ca8d9ef 100644
Binary files a/models/official/vision/ops/__pycache__/mask_ops.cpython-39.pyc and b/models/official/vision/ops/__pycache__/mask_ops.cpython-39.pyc differ
diff --git a/models/official/vision/ops/__pycache__/nms.cpython-39.pyc b/models/official/vision/ops/__pycache__/nms.cpython-39.pyc
index c34eb8b3..1464269c 100644
Binary files a/models/official/vision/ops/__pycache__/nms.cpython-39.pyc and b/models/official/vision/ops/__pycache__/nms.cpython-39.pyc differ
diff --git a/models/official/vision/ops/__pycache__/preprocess_ops.cpython-39.pyc b/models/official/vision/ops/__pycache__/preprocess_ops.cpython-39.pyc
index 1af06b27..6cfe4477 100644
Binary files a/models/official/vision/ops/__pycache__/preprocess_ops.cpython-39.pyc and b/models/official/vision/ops/__pycache__/preprocess_ops.cpython-39.pyc differ
diff --git a/models/official/vision/ops/__pycache__/preprocess_ops_3d.cpython-39.pyc b/models/official/vision/ops/__pycache__/preprocess_ops_3d.cpython-39.pyc
index 4ab2e180..e91acd80 100644
Binary files a/models/official/vision/ops/__pycache__/preprocess_ops_3d.cpython-39.pyc and b/models/official/vision/ops/__pycache__/preprocess_ops_3d.cpython-39.pyc differ
diff --git a/models/official/vision/ops/__pycache__/sampling_ops.cpython-39.pyc b/models/official/vision/ops/__pycache__/sampling_ops.cpython-39.pyc
index 65727cfe..9d5960b2 100644
Binary files a/models/official/vision/ops/__pycache__/sampling_ops.cpython-39.pyc and b/models/official/vision/ops/__pycache__/sampling_ops.cpython-39.pyc differ
diff --git a/models/official/vision/ops/__pycache__/spatial_transform_ops.cpython-39.pyc b/models/official/vision/ops/__pycache__/spatial_transform_ops.cpython-39.pyc
index b3484bea..4631b4f6 100644
Binary files a/models/official/vision/ops/__pycache__/spatial_transform_ops.cpython-39.pyc and b/models/official/vision/ops/__pycache__/spatial_transform_ops.cpython-39.pyc differ
diff --git a/models/official/vision/ops/__pycache__/target_gather.cpython-39.pyc b/models/official/vision/ops/__pycache__/target_gather.cpython-39.pyc
index 14f08cce..341929e2 100644
Binary files a/models/official/vision/ops/__pycache__/target_gather.cpython-39.pyc and b/models/official/vision/ops/__pycache__/target_gather.cpython-39.pyc differ
diff --git a/models/official/vision/ops/preprocess_ops.py b/models/official/vision/ops/preprocess_ops.py
index 890e3f70..dba6d61b 100644
--- a/models/official/vision/ops/preprocess_ops.py
+++ b/models/official/vision/ops/preprocess_ops.py
@@ -36,104 +36,81 @@
 
 
 def clip_or_pad_to_fixed_size(input_tensor, size, constant_values=0):
-  """Pads data to a fixed length at the first dimension.
-
-  Args:
-    input_tensor: `Tensor` with any dimension.
-    size: `int` number for the first dimension of output Tensor.
-    constant_values: `int` value assigned to the paddings.
-
-  Returns:
-    `Tensor` with the first dimension padded to `size`.
-  """
-  input_shape = input_tensor.get_shape().as_list()
-  padding_shape = []
-
-  # Computes the padding length on the first dimension, clip input tensor if it
-  # is longer than `size`.
-  input_length = tf.shape(input_tensor)[0]
-  input_length = tf.clip_by_value(input_length, 0, size)
-  input_tensor = input_tensor[:input_length]
-
-  padding_length = tf.maximum(0, size - input_length)
-  padding_shape.append(padding_length)
-
-  # Copies shapes of the rest of input shape dimensions.
-  for i in range(1, len(input_shape)):
-    padding_shape.append(tf.shape(input_tensor)[i])
-
-  # Pads input tensor to the fixed first dimension.
-  paddings = tf.cast(constant_values * tf.ones(padding_shape),
-                     input_tensor.dtype)
-  padded_tensor = tf.concat([input_tensor, paddings], axis=0)
-  output_shape = input_shape
-  output_shape[0] = size
-  padded_tensor.set_shape(output_shape)
-  return padded_tensor
-
-
-def normalize_image(image: tf.Tensor,
-                    offset: Sequence[float] = MEAN_NORM,
-                    scale: Sequence[float] = STDDEV_NORM):
-  """Normalizes the image to zero mean and unit variance."""
-  with tf.name_scope('normalize_image'):
-    image = tf.image.convert_image_dtype(image, dtype=tf.float32)
-    return normalize_scaled_float_image(image, offset, scale)
-
-
-def normalize_scaled_float_image(image: tf.Tensor,
-                                 offset: Sequence[float] = MEAN_NORM,
-                                 scale: Sequence[float] = STDDEV_NORM):
-  """Normalizes a scaled float image to zero mean and unit variance.
-
-  It assumes the input image is float dtype with values in [0, 1).
-
-  Args:
-    image: A tf.Tensor in float32 dtype with values in range [0, 1).
-    offset: A tuple of mean values to be subtracted from the image.
-    scale: A tuple of normalization factors.
-
-  Returns:
-    A normalized image tensor.
-  """
-  offset = tf.constant(offset)
-  offset = tf.expand_dims(offset, axis=0)
-  offset = tf.expand_dims(offset, axis=0)
-  image -= offset
-
-  scale = tf.constant(scale)
-  scale = tf.expand_dims(scale, axis=0)
-  scale = tf.expand_dims(scale, axis=0)
-  image /= scale
-  return image
+    """Pads data to a fixed length at the first dimension.
+    Args:
+      input_tensor: `Tensor` with any dimension.
+      size: `int` number for the first dimension of output Tensor.
+      constant_values: `int` value assigned to the paddings.
+    Returns:
+      `Tensor` with the first dimension padded to `size`.
+    """
+    input_shape = input_tensor.get_shape().as_list()
+    padding_shape = []
+
+    # Computes the padding length on the first dimension, clip input tensor if it
+    # is longer than `size`.
+    input_length = tf.shape(input_tensor)[0]
+    input_length = tf.clip_by_value(input_length, 0, size)
+    input_tensor = input_tensor[:input_length]
+
+    padding_length = tf.maximum(0, size - input_length)
+    padding_shape.append(padding_length)
+
+    # Copies shapes of the rest of input shape dimensions.
+    for i in range(1, len(input_shape)):
+        padding_shape.append(tf.shape(input_tensor)[i])
+
+    # Pads input tensor to the fixed first dimension.
+    paddings = tf.cast(constant_values * tf.ones(padding_shape),
+                       input_tensor.dtype)
+    padded_tensor = tf.concat([input_tensor, paddings], axis=0)
+    output_shape = input_shape
+    output_shape[0] = size
+    padded_tensor.set_shape(output_shape)
+    return padded_tensor
+
+
+def normalize_image(image,
+                    offset=(0.485, 0.456, 0.406),
+                    scale=(0.229, 0.224, 0.225)):
+    """Normalizes the image to zero mean and unit variance."""
+    with tf.name_scope('normalize_image'):
+        image = tf.image.convert_image_dtype(image, dtype=tf.float32)
+        offset = tf.constant(offset)
+        offset = tf.expand_dims(offset, axis=0)
+        offset = tf.expand_dims(offset, axis=0)
+        image -= offset
+
+        scale = tf.constant(scale)
+        scale = tf.expand_dims(scale, axis=0)
+        scale = tf.expand_dims(scale, axis=0)
+        image /= scale
+        return image
 
 
 def compute_padded_size(desired_size, stride):
-  """Compute the padded size given the desired size and the stride.
-
-  The padded size will be the smallest rectangle, such that each dimension is
-  the smallest multiple of the stride which is larger than the desired
-  dimension. For example, if desired_size = (100, 200) and stride = 32,
-  the output padded_size = (128, 224).
-
-  Args:
-    desired_size: a `Tensor` or `int` list/tuple of two elements representing
-      [height, width] of the target output image size.
-    stride: an integer, the stride of the backbone network.
-
-  Returns:
-    padded_size: a `Tensor` or `int` list/tuple of two elements representing
-      [height, width] of the padded output image size.
-  """
-  if isinstance(desired_size, list) or isinstance(desired_size, tuple):
-    padded_size = [int(math.ceil(d * 1.0 / stride) * stride)
-                   for d in desired_size]
-  else:
-    padded_size = tf.cast(
-        tf.math.ceil(
-            tf.cast(desired_size, dtype=tf.float32) / stride) * stride,
-        tf.int32)
-  return padded_size
+    """Compute the padded size given the desired size and the stride.
+    The padded size will be the smallest rectangle, such that each dimension is
+    the smallest multiple of the stride which is larger than the desired
+    dimension. For example, if desired_size = (100, 200) and stride = 32,
+    the output padded_size = (128, 224).
+    Args:
+      desired_size: a `Tensor` or `int` list/tuple of two elements representing
+        [height, width] of the target output image size.
+      stride: an integer, the stride of the backbone network.
+    Returns:
+      padded_size: a `Tensor` or `int` list/tuple of two elements representing
+        [height, width] of the padded output image size.
+    """
+    if isinstance(desired_size, list) or isinstance(desired_size, tuple):
+        padded_size = [int(math.ceil(d * 1.0 / stride) * stride)
+                       for d in desired_size]
+    else:
+        padded_size = tf.cast(
+            tf.math.ceil(
+                tf.cast(desired_size, dtype=tf.float32) / stride) * stride,
+            tf.int32)
+    return padded_size
 
 
 def resize_and_crop_image(image,
@@ -143,94 +120,88 @@ def resize_and_crop_image(image,
                           aug_scale_max=1.0,
                           seed=1,
                           method=tf.image.ResizeMethod.BILINEAR):
-  """Resizes the input image to output size (RetinaNet style).
-
-  Resize and pad images given the desired output size of the image and
-  stride size.
-
-  Here are the preprocessing steps.
-  1. For a given image, keep its aspect ratio and rescale the image to make it
-     the largest rectangle to be bounded by the rectangle specified by the
-     `desired_size`.
-  2. Pad the rescaled image to the padded_size.
-
-  Args:
-    image: a `Tensor` of shape [height, width, 3] representing an image.
-    desired_size: a `Tensor` or `int` list/tuple of two elements representing
-      [height, width] of the desired actual output image size.
-    padded_size: a `Tensor` or `int` list/tuple of two elements representing
-      [height, width] of the padded output image size. Padding will be applied
-      after scaling the image to the desired_size.
-    aug_scale_min: a `float` with range between [0, 1.0] representing minimum
-      random scale applied to desired_size for training scale jittering.
-    aug_scale_max: a `float` with range between [1.0, inf] representing maximum
-      random scale applied to desired_size for training scale jittering.
-    seed: seed for random scale jittering.
-    method: function to resize input image to scaled image.
-
-  Returns:
-    output_image: `Tensor` of shape [height, width, 3] where [height, width]
-      equals to `output_size`.
-    image_info: a 2D `Tensor` that encodes the information of the image and the
-      applied preprocessing. It is in the format of
-      [[original_height, original_width], [desired_height, desired_width],
-       [y_scale, x_scale], [y_offset, x_offset]], where [desired_height,
-      desired_width] is the actual scaled image size, and [y_scale, x_scale] is
-      the scaling factor, which is the ratio of
-      scaled dimension / original dimension.
-  """
-  with tf.name_scope('resize_and_crop_image'):
-    image_size = tf.cast(tf.shape(image)[0:2], tf.float32)
-
-    random_jittering = (
-        isinstance(aug_scale_min, tf.Tensor)
-        or isinstance(aug_scale_max, tf.Tensor)
-        or not math.isclose(aug_scale_min, 1.0)
-        or not math.isclose(aug_scale_max, 1.0)
-    )
-
-    if random_jittering:
-      random_scale = tf.random.uniform(
-          [], aug_scale_min, aug_scale_max, seed=seed)
-      scaled_size = tf.round(random_scale * tf.cast(desired_size, tf.float32))
-    else:
-      scaled_size = tf.cast(desired_size, tf.float32)
-
-    scale = tf.minimum(
-        scaled_size[0] / image_size[0], scaled_size[1] / image_size[1])
-    scaled_size = tf.round(image_size * scale)
-
-    # Computes 2D image_scale.
-    image_scale = scaled_size / image_size
-
-    # Selects non-zero random offset (x, y) if scaled image is larger than
-    # desired_size.
-    if random_jittering:
-      max_offset = scaled_size - tf.cast(desired_size, tf.float32)
-      max_offset = tf.where(
-          tf.less(max_offset, 0), tf.zeros_like(max_offset), max_offset)
-      offset = max_offset * tf.random.uniform([2,], 0, 1, seed=seed)
-      offset = tf.cast(offset, tf.int32)
-    else:
-      offset = tf.zeros((2,), tf.int32)
-
-    scaled_image = tf.image.resize(
-        image, tf.cast(scaled_size, tf.int32), method=method)
-
-    if random_jittering:
-      scaled_image = scaled_image[
-          offset[0]:offset[0] + desired_size[0],
-          offset[1]:offset[1] + desired_size[1], :]
-
-    output_image = tf.image.pad_to_bounding_box(
-        scaled_image, 0, 0, padded_size[0], padded_size[1])
-
-    image_info = tf.stack([
-        image_size,
-        tf.cast(desired_size, dtype=tf.float32),
-        image_scale,
-        tf.cast(offset, tf.float32)])
-    return output_image, image_info
+    """Resizes the input image to output size (RetinaNet style).
+    Resize and pad images given the desired output size of the image and
+    stride size.
+    Here are the preprocessing steps.
+    1. For a given image, keep its aspect ratio and rescale the image to make it
+       the largest rectangle to be bounded by the rectangle specified by the
+       `desired_size`.
+    2. Pad the rescaled image to the padded_size.
+    Args:
+      image: a `Tensor` of shape [height, width, 3] representing an image.
+      desired_size: a `Tensor` or `int` list/tuple of two elements representing
+        [height, width] of the desired actual output image size.
+      padded_size: a `Tensor` or `int` list/tuple of two elements representing
+        [height, width] of the padded output image size. Padding will be applied
+        after scaling the image to the desired_size.
+      aug_scale_min: a `float` with range between [0, 1.0] representing minimum
+        random scale applied to desired_size for training scale jittering.
+      aug_scale_max: a `float` with range between [1.0, inf] representing maximum
+        random scale applied to desired_size for training scale jittering.
+      seed: seed for random scale jittering.
+      method: function to resize input image to scaled image.
+    Returns:
+      output_image: `Tensor` of shape [height, width, 3] where [height, width]
+        equals to `output_size`.
+      image_info: a 2D `Tensor` that encodes the information of the image and the
+        applied preprocessing. It is in the format of
+        [[original_height, original_width], [desired_height, desired_width],
+         [y_scale, x_scale], [y_offset, x_offset]], where [desired_height,
+        desired_width] is the actual scaled image size, and [y_scale, x_scale] is
+        the scaling factor, which is the ratio of
+        scaled dimension / original dimension.
+    """
+    with tf.name_scope('resize_and_crop_image'):
+        image_size = tf.cast(tf.shape(image)[0:2], tf.float32)
+        desired_size = tf.cast(desired_size, tf.float32)
+        random_jittering = (aug_scale_min != 1.0 or aug_scale_max != 1.0)
+
+        if random_jittering:
+            random_scale = tf.random.uniform(
+                [], aug_scale_min, aug_scale_max, seed=seed)
+            scaled_size = tf.round(random_scale * desired_size)
+        else:
+            scaled_size = desired_size
+
+        scale = tf.minimum(
+            scaled_size[0] / image_size[0], scaled_size[1] / image_size[1])
+        scaled_size = tf.round(image_size * scale)
+
+        # Computes 2D image_scale.
+        image_scale = scaled_size / image_size
+
+        # Selects non-zero random offset (x, y) if scaled image is larger than
+        # desired_size.
+        if random_jittering:
+            max_offset = scaled_size - desired_size
+            max_offset = tf.where(
+                tf.less(max_offset, 0), tf.zeros_like(max_offset), max_offset)
+            offset = max_offset * tf.random.uniform([2, ], 0, 1, seed=seed)
+            offset = tf.cast(offset, tf.int32)
+        else:
+            offset = tf.zeros((2,), tf.int32)
+
+        scaled_image = tf.image.resize(
+            image, tf.cast(scaled_size, tf.int32), method=method)
+
+        if random_jittering:
+            desired_size = tf.cast(desired_size, tf.int32)
+            scaled_image = scaled_image[
+                           offset[0]:offset[0] + desired_size[0],
+                           offset[1]:offset[1] + desired_size[1], :]
+            desired_size = tf.cast(desired_size, tf.float32)
+
+        output_image = tf.image.pad_to_bounding_box(
+            scaled_image, 0, 0, padded_size[0], padded_size[1])
+
+        desired_size = tf.cast(desired_size, tf.float32)
+        image_info = tf.stack([
+            image_size,
+            desired_size,
+            image_scale,
+            tf.cast(offset, tf.float32)])
+        return output_image, image_info
 
 
 def resize_and_crop_image_v2(image,
@@ -241,255 +212,228 @@ def resize_and_crop_image_v2(image,
                              aug_scale_max=1.0,
                              seed=1,
                              method=tf.image.ResizeMethod.BILINEAR):
-  """Resizes the input image to output size (Faster R-CNN style).
-
-  Resize and pad images given the specified short / long side length and the
-  stride size.
-
-  Here are the preprocessing steps.
-  1. For a given image, keep its aspect ratio and first try to rescale the short
-     side of the original image to `short_side`.
-  2. If the scaled image after 1 has a long side that exceeds `long_side`, keep
-     the aspect ratio and rescale the long side of the image to `long_side`.
-  3. (Optional) Apply random jittering according to `aug_scale_min` and
-    `aug_scale_max`. By default this step is skipped.
-  4. Pad the rescaled image to the padded_size.
-
-  Args:
-    image: a `Tensor` of shape [height, width, 3] representing an image.
-    short_side: a scalar `Tensor` or `int` representing the desired short side
-      to be rescaled to.
-    long_side: a scalar `Tensor` or `int` representing the desired long side to
-      be rescaled to.
-    padded_size: a `Tensor` or `int` list/tuple of two elements representing
-      [height, width] of the padded output image size.
-    aug_scale_min: a `float` with range between [0, 1.0] representing minimum
-      random scale applied for training scale jittering.
-    aug_scale_max: a `float` with range between [1.0, inf] representing maximum
-      random scale applied for training scale jittering.
-    seed: seed for random scale jittering.
-    method: function to resize input image to scaled image.
-
-  Returns:
-    output_image: `Tensor` of shape [height, width, 3] where [height, width]
-      equals to `output_size`.
-    image_info: a 2D `Tensor` that encodes the information of the image and the
-      applied preprocessing. It is in the format of
-      [[original_height, original_width], [desired_height, desired_width],
-       [y_scale, x_scale], [y_offset, x_offset]], where [desired_height,
-      desired_width] is the actual scaled image size, and [y_scale, x_scale] is
-      the scaling factor, which is the ratio of
-      scaled dimension / original dimension.
-  """
-  with tf.name_scope('resize_and_crop_image_v2'):
-    image_size = tf.cast(tf.shape(image)[0:2], tf.float32)
-
-    scale_using_short_side = (
-        short_side / tf.math.minimum(image_size[0], image_size[1]))
-    scale_using_long_side = (
-        long_side / tf.math.maximum(image_size[0], image_size[1]))
-
-    scaled_size = tf.math.round(image_size * scale_using_short_side)
-    scaled_size = tf.where(
-        tf.math.greater(
-            tf.math.maximum(scaled_size[0], scaled_size[1]), long_side),
-        tf.math.round(image_size * scale_using_long_side),
-        scaled_size)
-    desired_size = scaled_size
-
-    random_jittering = (
-        isinstance(aug_scale_min, tf.Tensor)
-        or isinstance(aug_scale_max, tf.Tensor)
-        or not math.isclose(aug_scale_min, 1.0)
-        or not math.isclose(aug_scale_max, 1.0)
-    )
-
-    if random_jittering:
-      random_scale = tf.random.uniform(
-          [], aug_scale_min, aug_scale_max, seed=seed)
-      scaled_size = tf.math.round(random_scale * scaled_size)
-
-    # Computes 2D image_scale.
-    image_scale = scaled_size / image_size
-
-    # Selects non-zero random offset (x, y) if scaled image is larger than
-    # desired_size.
-    if random_jittering:
-      max_offset = scaled_size - desired_size
-      max_offset = tf.where(
-          tf.math.less(max_offset, 0), tf.zeros_like(max_offset), max_offset)
-      offset = max_offset * tf.random.uniform([2,], 0, 1, seed=seed)
-      offset = tf.cast(offset, tf.int32)
-    else:
-      offset = tf.zeros((2,), tf.int32)
-
-    scaled_image = tf.image.resize(
-        image, tf.cast(scaled_size, tf.int32), method=method)
-
-    if random_jittering:
-      scaled_image = scaled_image[
-          offset[0]:offset[0] + desired_size[0],
-          offset[1]:offset[1] + desired_size[1], :]
-
-    output_image = tf.image.pad_to_bounding_box(
-        scaled_image, 0, 0, padded_size[0], padded_size[1])
-
-    image_info = tf.stack([
-        image_size,
-        tf.cast(desired_size, dtype=tf.float32),
-        image_scale,
-        tf.cast(offset, tf.float32)])
-    return output_image, image_info
+    """Resizes the input image to output size (Faster R-CNN style).
+    Resize and pad images given the specified short / long side length and the
+    stride size.
+    Here are the preprocessing steps.
+    1. For a given image, keep its aspect ratio and first try to rescale the short
+       side of the original image to `short_side`.
+    2. If the scaled image after 1 has a long side that exceeds `long_side`, keep
+       the aspect ratio and rescal the long side of the image to `long_side`.
+    2. Pad the rescaled image to the padded_size.
+    Args:
+      image: a `Tensor` of shape [height, width, 3] representing an image.
+      short_side: a scalar `Tensor` or `int` representing the desired short side
+        to be rescaled to.
+      long_side: a scalar `Tensor` or `int` representing the desired long side to
+        be rescaled to.
+      padded_size: a `Tensor` or `int` list/tuple of two elements representing
+        [height, width] of the padded output image size. Padding will be applied
+        after scaling the image to the desired_size.
+      aug_scale_min: a `float` with range between [0, 1.0] representing minimum
+        random scale applied to desired_size for training scale jittering.
+      aug_scale_max: a `float` with range between [1.0, inf] representing maximum
+        random scale applied to desired_size for training scale jittering.
+      seed: seed for random scale jittering.
+      method: function to resize input image to scaled image.
+    Returns:
+      output_image: `Tensor` of shape [height, width, 3] where [height, width]
+        equals to `output_size`.
+      image_info: a 2D `Tensor` that encodes the information of the image and the
+        applied preprocessing. It is in the format of
+        [[original_height, original_width], [desired_height, desired_width],
+         [y_scale, x_scale], [y_offset, x_offset]], where [desired_height,
+        desired_width] is the actual scaled image size, and [y_scale, x_scale] is
+        the scaling factor, which is the ratio of
+        scaled dimension / original dimension.
+    """
+    with tf.name_scope('resize_and_crop_image_v2'):
+        image_size = tf.cast(tf.shape(image)[0:2], tf.float32)
+
+        scale_using_short_side = (
+                short_side / tf.math.minimum(image_size[0], image_size[1]))
+        scale_using_long_side = (
+                long_side / tf.math.maximum(image_size[0], image_size[1]))
+
+        scaled_size = tf.math.round(image_size * scale_using_short_side)
+        scaled_size = tf.where(
+            tf.math.greater(
+                tf.math.maximum(scaled_size[0], scaled_size[1]), long_side),
+            tf.math.round(image_size * scale_using_long_side),
+            scaled_size)
+        desired_size = scaled_size
+
+        random_jittering = (aug_scale_min != 1.0 or aug_scale_max != 1.0)
+
+        if random_jittering:
+            random_scale = tf.random.uniform(
+                [], aug_scale_min, aug_scale_max, seed=seed)
+            scaled_size = tf.math.round(random_scale * scaled_size)
+
+        # Computes 2D image_scale.
+        image_scale = scaled_size / image_size
+
+        # Selects non-zero random offset (x, y) if scaled image is larger than
+        # desired_size.
+        if random_jittering:
+            max_offset = scaled_size - desired_size
+            max_offset = tf.where(
+                tf.math.less(max_offset, 0), tf.zeros_like(max_offset), max_offset)
+            offset = max_offset * tf.random.uniform([2, ], 0, 1, seed=seed)
+            offset = tf.cast(offset, tf.int32)
+        else:
+            offset = tf.zeros((2,), tf.int32)
+
+        scaled_image = tf.image.resize(
+            image, tf.cast(scaled_size, tf.int32), method=method)
+
+        if random_jittering:
+            scaled_image = scaled_image[
+                           offset[0]:offset[0] + desired_size[0],
+                           offset[1]:offset[1] + desired_size[1], :]
+
+        output_image = tf.image.pad_to_bounding_box(
+            scaled_image, 0, 0, padded_size[0], padded_size[1])
+
+        image_info = tf.stack([
+            image_size,
+            tf.cast(desired_size, dtype=tf.float32),
+            image_scale,
+            tf.cast(offset, tf.float32)])
+        return output_image, image_info
 
 
 def resize_image(
-    image: tf.Tensor,
-    size: Union[Tuple[int, int], int],
-    max_size: Optional[int] = None,
-    method: tf.image.ResizeMethod = tf.image.ResizeMethod.BILINEAR):
-  """Resize image with size and max_size.
-
-  Args:
-    image: the image to be resized.
-    size: if list to tuple, resize to it. If scalar, we keep the same
-      aspect ratio and resize the short side to the value.
-    max_size: only used when size is a scalar. When the larger side is larger
-      than max_size after resized with size we used max_size to keep the aspect
-      ratio instead.
-    method: the method argument passed to tf.image.resize.
-
-  Returns:
-    the resized image and image_info to be used for downstream processing.
-    image_info: a 2D `Tensor` that encodes the information of the image and the
-      applied preprocessing. It is in the format of
-      [[original_height, original_width], [resized_height, resized_width],
-      [y_scale, x_scale], [0, 0]], where [resized_height, resized_width]
-      is the actual scaled image size, and [y_scale, x_scale] is the
-      scaling factor, which is the ratio of
-      scaled dimension / original dimension.
-  """
-
-  def get_size_with_aspect_ratio(image_size, size, max_size=None):
-    h = image_size[0]
-    w = image_size[1]
-    if max_size is not None:
-      min_original_size = tf.cast(tf.math.minimum(w, h), dtype=tf.float32)
-      max_original_size = tf.cast(tf.math.maximum(w, h), dtype=tf.float32)
-      if max_original_size / min_original_size * size > max_size:
-        size = tf.cast(
-            tf.math.floor(max_size * min_original_size / max_original_size),
-            dtype=tf.int32)
-      else:
-        size = tf.cast(size, tf.int32)
-
-    else:
-      size = tf.cast(size, tf.int32)
-    if (w <= h and w == size) or (h <= w and h == size):
-      return tf.stack([h, w])
-
-    if w < h:
-      ow = size
-      oh = tf.cast(
-          (tf.cast(size, dtype=tf.float32) * tf.cast(h, dtype=tf.float32) /
-           tf.cast(w, dtype=tf.float32)),
-          dtype=tf.int32)
-    else:
-      oh = size
-      ow = tf.cast(
-          (tf.cast(size, dtype=tf.float32) * tf.cast(w, dtype=tf.float32) /
-           tf.cast(h, dtype=tf.float32)),
-          dtype=tf.int32)
-
-    return tf.stack([oh, ow])
-
-  def get_size(image_size, size, max_size=None):
-    if isinstance(size, (list, tuple)):
-      return size[::-1]
-    else:
-      return get_size_with_aspect_ratio(image_size, size, max_size)
-
-  orignal_size = tf.shape(image)[0:2]
-  size = get_size(orignal_size, size, max_size)
-  rescaled_image = tf.image.resize(
-      image, tf.cast(size, tf.int32), method=method)
-  image_scale = size / orignal_size
-  image_info = tf.stack([
-      tf.cast(orignal_size, dtype=tf.float32),
-      tf.cast(size, dtype=tf.float32),
-      tf.cast(image_scale, tf.float32),
-      tf.constant([0.0, 0.0], dtype=tf.float32)
-  ])
-  return rescaled_image, image_info
-
-
-def center_crop_image(
-    image, center_crop_fraction: float = CENTER_CROP_FRACTION):
-  """Center crop a square shape slice from the input image.
-
-  It crops a square shape slice from the image. The side of the actual crop
-  is 224 / 256 = 0.875 of the short side of the original image. References:
-  [1] Very Deep Convolutional Networks for Large-Scale Image Recognition
-      https://arxiv.org/abs/1409.1556
-  [2] Deep Residual Learning for Image Recognition
-      https://arxiv.org/abs/1512.03385
-
-  Args:
-    image: a Tensor of shape [height, width, 3] representing the input image.
-    center_crop_fraction: a float of ratio between the side of the cropped image
-      and the short side of the original image
-
-  Returns:
-    cropped_image: a Tensor representing the center cropped image.
-  """
-  with tf.name_scope('center_crop_image'):
-    image_size = tf.cast(tf.shape(image)[:2], dtype=tf.float32)
-    crop_size = (
-        center_crop_fraction * tf.math.minimum(image_size[0], image_size[1]))
-    crop_offset = tf.cast((image_size - crop_size) / 2.0, dtype=tf.int32)
-    crop_size = tf.cast(crop_size, dtype=tf.int32)
-    cropped_image = image[
-        crop_offset[0]:crop_offset[0] + crop_size,
-        crop_offset[1]:crop_offset[1] + crop_size, :]
-    return cropped_image
-
-
-def center_crop_image_v2(
-    image_bytes, image_shape, center_crop_fraction: float = CENTER_CROP_FRACTION
-):
-  """Center crop a square shape slice from the input image.
-
-  It crops a square shape slice from the image. The side of the actual crop
-  is 224 / 256 = 0.875 of the short side of the original image. References:
-  [1] Very Deep Convolutional Networks for Large-Scale Image Recognition
-      https://arxiv.org/abs/1409.1556
-  [2] Deep Residual Learning for Image Recognition
-      https://arxiv.org/abs/1512.03385
-
-  This is a faster version of `center_crop_image` which takes the original
-  image bytes and image size as the inputs, and partially decode the JPEG
-  bytes according to the center crop.
-
-  Args:
-    image_bytes: a Tensor of type string representing the raw image bytes.
-    image_shape: a Tensor specifying the shape of the raw image.
-    center_crop_fraction: a float of ratio between the side of the cropped image
-      and the short side of the original image
-
-  Returns:
-    cropped_image: a Tensor representing the center cropped image.
-  """
-  with tf.name_scope('center_image_crop_v2'):
-    image_shape = tf.cast(image_shape, tf.float32)
-    crop_size = center_crop_fraction * tf.math.minimum(
-        image_shape[0], image_shape[1]
-    )
-    crop_offset = tf.cast((image_shape - crop_size) / 2.0, dtype=tf.int32)
-    crop_size = tf.cast(crop_size, dtype=tf.int32)
-    crop_window = tf.stack(
-        [crop_offset[0], crop_offset[1], crop_size, crop_size])
-    cropped_image = tf.image.decode_and_crop_jpeg(
-        image_bytes, crop_window, channels=3)
-    return cropped_image
+        image: tf.Tensor,
+        size: Union[Tuple[int, int], int],
+        max_size: Optional[int] = None,
+        method: tf.image.ResizeMethod = tf.image.ResizeMethod.BILINEAR):
+    """Resize image with size and max_size.
+    Args:
+      image: the image to be resized.
+      size: if list to tuple, resize to it. If scalar, we keep the same
+        aspect ratio and resize the short side to the value.
+      max_size: only used when size is a scalar. When the larger side is larger
+        than max_size after resized with size we used max_size to keep the aspect
+        ratio instead.
+      method: the method argument passed to tf.image.resize.
+    Returns:
+      the resized image and image_info to be used for downstream processing.
+      image_info: a 2D `Tensor` that encodes the information of the image and the
+        applied preprocessing. It is in the format of
+        [[original_height, original_width], [resized_height, resized_width],
+        [y_scale, x_scale], [0, 0]], where [resized_height, resized_width]
+        is the actual scaled image size, and [y_scale, x_scale] is the
+        scaling factor, which is the ratio of
+        scaled dimension / original dimension.
+    """
+
+    def get_size_with_aspect_ratio(image_size, size, max_size=None):
+        h = image_size[0]
+        w = image_size[1]
+        if max_size is not None:
+            min_original_size = tf.cast(tf.math.minimum(w, h), dtype=tf.float32)
+            max_original_size = tf.cast(tf.math.maximum(w, h), dtype=tf.float32)
+            if max_original_size / min_original_size * size > max_size:
+                size = tf.cast(
+                    tf.math.floor(max_size * min_original_size / max_original_size),
+                    dtype=tf.int32)
+            else:
+                size = tf.cast(size, tf.int32)
+
+        else:
+            size = tf.cast(size, tf.int32)
+        if (w <= h and w == size) or (h <= w and h == size):
+            return tf.stack([h, w])
+
+        if w < h:
+            ow = size
+            oh = tf.cast(
+                (tf.cast(size, dtype=tf.float32) * tf.cast(h, dtype=tf.float32) /
+                 tf.cast(w, dtype=tf.float32)),
+                dtype=tf.int32)
+        else:
+            oh = size
+            ow = tf.cast(
+                (tf.cast(size, dtype=tf.float32) * tf.cast(w, dtype=tf.float32) /
+                 tf.cast(h, dtype=tf.float32)),
+                dtype=tf.int32)
+
+        return tf.stack([oh, ow])
+
+    def get_size(image_size, size, max_size=None):
+        if isinstance(size, (list, tuple)):
+            return size[::-1]
+        else:
+            return get_size_with_aspect_ratio(image_size, size, max_size)
+
+    orignal_size = tf.shape(image)[0:2]
+    size = get_size(orignal_size, size, max_size)
+    rescaled_image = tf.image.resize(
+        image, tf.cast(size, tf.int32), method=method)
+    image_scale = size / orignal_size
+    image_info = tf.stack([
+        tf.cast(orignal_size, dtype=tf.float32),
+        tf.cast(size, dtype=tf.float32),
+        tf.cast(image_scale, tf.float32),
+        tf.constant([0.0, 0.0], dtype=tf.float32)
+    ])
+    return rescaled_image, image_info
+
+
+def center_crop_image(image):
+    """Center crop a square shape slice from the input image.
+    It crops a square shape slice from the image. The side of the actual crop
+    is 224 / 256 = 0.875 of the short side of the original image. References:
+    [1] Very Deep Convolutional Networks for Large-Scale Image Recognition
+        https://arxiv.org/abs/1409.1556
+    [2] Deep Residual Learning for Image Recognition
+        https://arxiv.org/abs/1512.03385
+    Args:
+      image: a Tensor of shape [height, width, 3] representing the input image.
+    Returns:
+      cropped_image: a Tensor representing the center cropped image.
+    """
+    with tf.name_scope('center_crop_image'):
+        image_size = tf.cast(tf.shape(image)[:2], dtype=tf.float32)
+        crop_size = (
+                CENTER_CROP_FRACTION * tf.math.minimum(image_size[0], image_size[1]))
+        crop_offset = tf.cast((image_size - crop_size) / 2.0, dtype=tf.int32)
+        crop_size = tf.cast(crop_size, dtype=tf.int32)
+        cropped_image = image[
+                        crop_offset[0]:crop_offset[0] + crop_size,
+                        crop_offset[1]:crop_offset[1] + crop_size, :]
+        return cropped_image
+
+
+def center_crop_image_v2(image_bytes, image_shape):
+    """Center crop a square shape slice from the input image.
+    It crops a square shape slice from the image. The side of the actual crop
+    is 224 / 256 = 0.875 of the short side of the original image. References:
+    [1] Very Deep Convolutional Networks for Large-Scale Image Recognition
+        https://arxiv.org/abs/1409.1556
+    [2] Deep Residual Learning for Image Recognition
+        https://arxiv.org/abs/1512.03385
+    This is a faster version of `center_crop_image` which takes the original
+    image bytes and image size as the inputs, and partially decode the JPEG
+    bytes according to the center crop.
+    Args:
+      image_bytes: a Tensor of type string representing the raw image bytes.
+      image_shape: a Tensor specifying the shape of the raw image.
+    Returns:
+      cropped_image: a Tensor representing the center cropped image.
+    """
+    with tf.name_scope('center_image_crop_v2'):
+        image_shape = tf.cast(image_shape, tf.float32)
+        crop_size = (
+                CENTER_CROP_FRACTION * tf.math.minimum(image_shape[0], image_shape[1]))
+        crop_offset = tf.cast((image_shape - crop_size) / 2.0, dtype=tf.int32)
+        crop_size = tf.cast(crop_size, dtype=tf.int32)
+        crop_window = tf.stack(
+            [crop_offset[0], crop_offset[1], crop_size, crop_size])
+        cropped_image = tf.image.decode_and_crop_jpeg(
+            image_bytes, crop_window, channels=3)
+        return cropped_image
 
 
 def random_crop_image(image,
@@ -497,34 +441,32 @@ def random_crop_image(image,
                       area_range=(0.08, 1.0),
                       max_attempts=10,
                       seed=1):
-  """Randomly crop an arbitrary shaped slice from the input image.
-
-  Args:
-    image: a Tensor of shape [height, width, 3] representing the input image.
-    aspect_ratio_range: a list of floats. The cropped area of the image must
-      have an aspect ratio = width / height within this range.
-    area_range: a list of floats. The cropped reas of the image must contain
-      a fraction of the input image within this range.
-    max_attempts: the number of attempts at generating a cropped region of the
-      image of the specified constraints. After max_attempts failures, return
-      the entire image.
-    seed: the seed of the random generator.
-
-  Returns:
-    cropped_image: a Tensor representing the random cropped image. Can be the
-      original image if max_attempts is exhausted.
-  """
-  with tf.name_scope('random_crop_image'):
-    crop_offset, crop_size, _ = tf.image.sample_distorted_bounding_box(
-        tf.shape(image),
-        tf.constant([0.0, 0.0, 1.0, 1.0], dtype=tf.float32, shape=[1, 1, 4]),
-        seed=seed,
-        min_object_covered=area_range[0],
-        aspect_ratio_range=aspect_ratio_range,
-        area_range=area_range,
-        max_attempts=max_attempts)
-    cropped_image = tf.slice(image, crop_offset, crop_size)
-    return cropped_image
+    """Randomly crop an arbitrary shaped slice from the input image.
+    Args:
+      image: a Tensor of shape [height, width, 3] representing the input image.
+      aspect_ratio_range: a list of floats. The cropped area of the image must
+        have an aspect ratio = width / height within this range.
+      area_range: a list of floats. The cropped reas of the image must contain
+        a fraction of the input image within this range.
+      max_attempts: the number of attempts at generating a cropped region of the
+        image of the specified constraints. After max_attempts failures, return
+        the entire image.
+      seed: the seed of the random generator.
+    Returns:
+      cropped_image: a Tensor representing the random cropped image. Can be the
+        original image if max_attempts is exhausted.
+    """
+    with tf.name_scope('random_crop_image'):
+        crop_offset, crop_size, _ = tf.image.sample_distorted_bounding_box(
+            tf.shape(image),
+            tf.constant([0.0, 0.0, 1.0, 1.0], dtype=tf.float32, shape=[1, 1, 4]),
+            seed=seed,
+            min_object_covered=area_range[0],
+            aspect_ratio_range=aspect_ratio_range,
+            area_range=area_range,
+            max_attempts=max_attempts)
+        cropped_image = tf.slice(image, crop_offset, crop_size)
+        return cropped_image
 
 
 def random_crop_image_v2(image_bytes,
@@ -533,223 +475,149 @@ def random_crop_image_v2(image_bytes,
                          area_range=(0.08, 1.0),
                          max_attempts=10,
                          seed=1):
-  """Randomly crop an arbitrary shaped slice from the input image.
-
-  This is a faster version of `random_crop_image` which takes the original
-  image bytes and image size as the inputs, and partially decode the JPEG
-  bytes according to the generated crop.
-
-  Args:
-    image_bytes: a Tensor of type string representing the raw image bytes.
-    image_shape: a Tensor specifying the shape of the raw image.
-    aspect_ratio_range: a list of floats. The cropped area of the image must
-      have an aspect ratio = width / height within this range.
-    area_range: a list of floats. The cropped reas of the image must contain
-      a fraction of the input image within this range.
-    max_attempts: the number of attempts at generating a cropped region of the
-      image of the specified constraints. After max_attempts failures, return
-      the entire image.
-    seed: the seed of the random generator.
-
-  Returns:
-    cropped_image: a Tensor representing the random cropped image. Can be the
-      original image if max_attempts is exhausted.
-  """
-  with tf.name_scope('random_crop_image_v2'):
-    crop_offset, crop_size, _ = tf.image.sample_distorted_bounding_box(
-        image_shape,
-        tf.constant([0.0, 0.0, 1.0, 1.0], dtype=tf.float32, shape=[1, 1, 4]),
-        seed=seed,
-        min_object_covered=area_range[0],
-        aspect_ratio_range=aspect_ratio_range,
-        area_range=area_range,
-        max_attempts=max_attempts)
-    offset_y, offset_x, _ = tf.unstack(crop_offset)
-    crop_height, crop_width, _ = tf.unstack(crop_size)
-    crop_window = tf.stack([offset_y, offset_x, crop_height, crop_width])
-    cropped_image = tf.image.decode_and_crop_jpeg(
-        image_bytes, crop_window, channels=3)
-    return cropped_image
+    """Randomly crop an arbitrary shaped slice from the input image.
+    This is a faster version of `random_crop_image` which takes the original
+    image bytes and image size as the inputs, and partially decode the JPEG
+    bytes according to the generated crop.
+    Args:
+      image_bytes: a Tensor of type string representing the raw image bytes.
+      image_shape: a Tensor specifying the shape of the raw image.
+      aspect_ratio_range: a list of floats. The cropped area of the image must
+        have an aspect ratio = width / height within this range.
+      area_range: a list of floats. The cropped reas of the image must contain
+        a fraction of the input image within this range.
+      max_attempts: the number of attempts at generating a cropped region of the
+        image of the specified constraints. After max_attempts failures, return
+        the entire image.
+      seed: the seed of the random generator.
+    Returns:
+      cropped_image: a Tensor representing the random cropped image. Can be the
+        original image if max_attempts is exhausted.
+    """
+    with tf.name_scope('random_crop_image_v2'):
+        crop_offset, crop_size, _ = tf.image.sample_distorted_bounding_box(
+            image_shape,
+            tf.constant([0.0, 0.0, 1.0, 1.0], dtype=tf.float32, shape=[1, 1, 4]),
+            seed=seed,
+            min_object_covered=area_range[0],
+            aspect_ratio_range=aspect_ratio_range,
+            area_range=area_range,
+            max_attempts=max_attempts)
+        offset_y, offset_x, _ = tf.unstack(crop_offset)
+        crop_height, crop_width, _ = tf.unstack(crop_size)
+        crop_window = tf.stack([offset_y, offset_x, crop_height, crop_width])
+        cropped_image = tf.image.decode_and_crop_jpeg(
+            image_bytes, crop_window, channels=3)
+        return cropped_image
 
 
 def resize_and_crop_boxes(boxes,
                           image_scale,
                           output_size,
                           offset):
-  """Resizes boxes to output size with scale and offset.
-
-  Args:
-    boxes: `Tensor` of shape [N, 4] representing ground truth boxes.
-    image_scale: 2D float `Tensor` representing scale factors that apply to
-      [height, width] of input image.
-    output_size: 2D `Tensor` or `int` representing [height, width] of target
-      output image size.
-    offset: 2D `Tensor` representing top-left corner [y0, x0] to crop scaled
-      boxes.
-
-  Returns:
-    boxes: `Tensor` of shape [N, 4] representing the scaled boxes.
-  """
-  with tf.name_scope('resize_and_crop_boxes'):
-    # Adjusts box coordinates based on image_scale and offset.
-    boxes *= tf.tile(tf.expand_dims(image_scale, axis=0), [1, 2])
-    boxes -= tf.tile(tf.expand_dims(offset, axis=0), [1, 2])
-    # Clips the boxes.
-    boxes = box_ops.clip_boxes(boxes, output_size)
-    return boxes
-
-
-def resize_and_crop_masks(masks, image_scale, output_size, offset):
-  """Resizes boxes to output size with scale and offset.
-
-  Args:
-    masks: `Tensor` of shape [N, H, W, C] representing ground truth masks.
-    image_scale: 2D float `Tensor` representing scale factors that apply to
-      [height, width] of input image.
-    output_size: 2D `Tensor` or `int` representing [height, width] of target
-      output image size.
-    offset: 2D `Tensor` representing top-left corner [y0, x0] to crop scaled
-      boxes.
-
-  Returns:
-    masks: `Tensor` of shape [N, H, W, C] representing the scaled masks.
-  """
-  with tf.name_scope('resize_and_crop_masks'):
-    mask_size = tf.cast(tf.shape(masks)[1:3], tf.float32)
-    num_channels = tf.shape(masks)[3]
-    # Pad masks to avoid empty mask annotations.
-    masks = tf.concat([
-        tf.zeros([1, mask_size[0], mask_size[1], num_channels],
-                 dtype=masks.dtype), masks
-    ],
-                      axis=0)
-
-    scaled_size = tf.cast(image_scale * mask_size, tf.int32)
-    scaled_masks = tf.image.resize(
-        masks, scaled_size, method=tf.image.ResizeMethod.NEAREST_NEIGHBOR)
-    offset = tf.cast(offset, tf.int32)
-    scaled_masks = scaled_masks[
-        :,
-        offset[0]:offset[0] + output_size[0],
-        offset[1]:offset[1] + output_size[1],
-        :]
-
-    output_masks = tf.image.pad_to_bounding_box(
-        scaled_masks, 0, 0, output_size[0], output_size[1])
-    # Remove padding.
-    output_masks = output_masks[1::]
-    return output_masks
+    """Resizes boxes to output size with scale and offset.
+    Args:
+      boxes: `Tensor` of shape [N, 4] representing ground truth boxes.
+      image_scale: 2D float `Tensor` representing scale factors that apply to
+        [height, width] of input image.
+      output_size: 2D `Tensor` or `int` representing [height, width] of target
+        output image size.
+      offset: 2D `Tensor` representing top-left corner [y0, x0] to crop scaled
+        boxes.
+    Returns:
+      boxes: `Tensor` of shape [N, 4] representing the scaled boxes.
+    """
+    with tf.name_scope('resize_and_crop_boxes'):
+        # Adjusts box coordinates based on image_scale and offset.
+        boxes *= tf.tile(tf.expand_dims(image_scale, axis=0), [1, 2])
+        boxes -= tf.tile(tf.expand_dims(offset, axis=0), [1, 2])
+        # Clips the boxes.
+        boxes = box_ops.clip_boxes(boxes, output_size)
+        return boxes
+
+
+def resize_and_crop_masks(masks,
+                          image_scale,
+                          output_size,
+                          offset):
+    """Resizes boxes to output size with scale and offset.
+    Args:
+      masks: `Tensor` of shape [N, H, W, 1] representing ground truth masks.
+      image_scale: 2D float `Tensor` representing scale factors that apply to
+        [height, width] of input image.
+      output_size: 2D `Tensor` or `int` representing [height, width] of target
+        output image size.
+      offset: 2D `Tensor` representing top-left corner [y0, x0] to crop scaled
+        boxes.
+    Returns:
+      masks: `Tensor` of shape [N, H, W, 1] representing the scaled masks.
+    """
+    with tf.name_scope('resize_and_crop_masks'):
+        mask_size = tf.cast(tf.shape(masks)[1:3], tf.float32)
+        
+        # Pad masks to avoid empty mask annotations.
+        masks = tf.concat([tf.zeros([1, mask_size[0], mask_size[1], 1]), masks], axis=0)
+        
+        scaled_size = tf.cast(image_scale * mask_size, tf.int32)
+        
+        scaled_masks = tf.image.resize(
+            masks, scaled_size, method=tf.image.ResizeMethod.NEAREST_NEIGHBOR)
+        offset = tf.cast(offset, tf.int32)
+        scaled_masks = scaled_masks[
+                       :,
+                       offset[0]:offset[0] + output_size[0],
+                       offset[1]:offset[1] + output_size[1],
+                       :]
+        output_masks = tf.image.pad_to_bounding_box(
+            scaled_masks, 0, 0, output_size[0], output_size[1])
+        # Remove padding.
+        output_masks = output_masks[1::]
+        return output_masks
 
 
 def horizontal_flip_image(image):
-  """Flips image horizontally."""
-  return tf.image.flip_left_right(image)
+    """Flips image horizontally."""
+    return tf.image.flip_left_right(image)
+
+
+def horizontal_flip_boxes(normalized_boxes):
+    """Flips normalized boxes horizontally."""
+    ymin, xmin, ymax, xmax = tf.split(
+        value=normalized_boxes, num_or_size_splits=4, axis=1)
+    flipped_xmin = tf.subtract(1.0, xmax)
+    flipped_xmax = tf.subtract(1.0, xmin)
+    flipped_boxes = tf.concat([ymin, flipped_xmin, ymax, flipped_xmax], 1)
+    return flipped_boxes
 
 
 def horizontal_flip_masks(masks):
-  """Flips masks horizontally."""
-  return masks[:, :, ::-1]
-
-
-def random_horizontal_flip(
-    image, normalized_boxes=None, masks=None, seed=1, prob=0.5
-):
-  """Randomly flips input image and bounding boxes horizontally."""
-  with tf.name_scope('random_horizontal_flip'):
-    do_flip = tf.less(tf.random.uniform([], seed=seed), prob)
-
-    image = tf.cond(
-        do_flip,
-        lambda: horizontal_flip_image(image),
-        lambda: image)
-
-    if normalized_boxes is not None:
-      normalized_boxes = tf.cond(
-          do_flip,
-          lambda: horizontal_flip_boxes(normalized_boxes),
-          lambda: normalized_boxes)
-
-    if masks is not None:
-      masks = tf.cond(
-          do_flip,
-          lambda: horizontal_flip_masks(masks),
-          lambda: masks)
-
-    return image, normalized_boxes, masks
-
-
-def random_horizontal_flip_with_roi(
-    image: tf.Tensor,
-    boxes: Optional[tf.Tensor] = None,
-    masks: Optional[tf.Tensor] = None,
-    roi_boxes: Optional[tf.Tensor] = None,
-    seed: int = 1
-) -> Tuple[tf.Tensor, Optional[tf.Tensor], Optional[tf.Tensor],
-           Optional[tf.Tensor]]:
-  """Randomly flips input image and bounding boxes horizontally.
-
-  Extends preprocess_ops.random_horizontal_flip to also flip roi_boxes used
-  by ViLD.
-
-  Args:
-    image: `tf.Tensor`, the image to apply the random flip.
-    boxes: `tf.Tensor` or `None`, boxes corresponding to the image.
-    masks: `tf.Tensor` or `None`, masks corresponding to the image.
-    roi_boxes: `tf.Tensor` or `None`, RoIs corresponding to the image.
-    seed: Seed for Tensorflow's random number generator.
-
-  Returns:
-    image: `tf.Tensor`, flipped image.
-    boxes: `tf.Tensor` or `None`, flipped boxes corresponding to the image.
-    masks: `tf.Tensor` or `None`, flipped masks corresponding to the image.
-    roi_boxes: `tf.Tensor` or `None`, flipped RoIs corresponding to the image.
-  """
-  with tf.name_scope('random_horizontal_flip'):
-    do_flip = tf.greater(tf.random.uniform([], seed=seed), 0.5)
-
-    image = tf.cond(do_flip, lambda: horizontal_flip_image(image),
-                    lambda: image)
-
-    if boxes is not None:
-      boxes = tf.cond(do_flip, lambda: horizontal_flip_boxes(boxes),
-                      lambda: boxes)
-
-    if masks is not None:
-      masks = tf.cond(do_flip, lambda: horizontal_flip_masks(masks),
-                      lambda: masks)
-
-    if roi_boxes is not None:
-      roi_boxes = tf.cond(do_flip, lambda: horizontal_flip_boxes(roi_boxes),
-                          lambda: roi_boxes)
-
-    return image, boxes, masks, roi_boxes
-
-
-def random_vertical_flip(
-    image, normalized_boxes=None, masks=None, seed=1, prob=0.5
-):
-  """Randomly flips input image and bounding boxes vertically."""
-  with tf.name_scope('random_vertical_flip'):
-    do_flip = tf.less(tf.random.uniform([], seed=seed), prob)
-
-    image = tf.cond(
-        do_flip,
-        lambda: tf.image.flip_up_down(image),
-        lambda: image)
-
-    if normalized_boxes is not None:
-      normalized_boxes = tf.cond(
-          do_flip,
-          lambda: vertical_flip_boxes(normalized_boxes),
-          lambda: normalized_boxes)
-
-    if masks is not None:
-      masks = tf.cond(
-          do_flip,
-          lambda: tf.image.flip_up_down(masks[..., None])[..., 0],
-          lambda: masks)
-
-    return image, normalized_boxes, masks
+    """Flips masks horizontally."""
+    return masks[:, :, ::-1]
+
+
+def random_horizontal_flip(image, normalized_boxes=None, masks=None, seed=None):
+    """Randomly flips input image and bounding boxes."""
+    with tf.name_scope('random_horizontal_flip'):
+        do_flip = tf.greater(tf.random.uniform([], seed=seed), 0.5)
+
+        image = tf.cond(
+            do_flip,
+            lambda: horizontal_flip_image(image),
+            lambda: image)
+
+        if normalized_boxes is not None:
+            normalized_boxes = tf.cond(
+                do_flip,
+                lambda: horizontal_flip_boxes(normalized_boxes),
+                lambda: normalized_boxes)
+
+        if masks is not None:
+            masks = tf.cond(
+                do_flip,
+                lambda: horizontal_flip_masks(masks),
+                lambda: masks)
+
+        return image, normalized_boxes, masks
 
 
 def color_jitter(image: tf.Tensor,
@@ -757,237 +625,297 @@ def color_jitter(image: tf.Tensor,
                  contrast: Optional[float] = 0.,
                  saturation: Optional[float] = 0.,
                  seed: Optional[int] = None) -> tf.Tensor:
-  """Applies color jitter to an image, similarly to torchvision`s ColorJitter.
-
-  Args:
-    image (tf.Tensor): Of shape [height, width, 3] and type uint8.
-    brightness (float, optional): Magnitude for brightness jitter. Defaults to
-      0.
-    contrast (float, optional): Magnitude for contrast jitter. Defaults to 0.
-    saturation (float, optional): Magnitude for saturation jitter. Defaults to
-      0.
-    seed (int, optional): Random seed. Defaults to None.
-
-  Returns:
-    tf.Tensor: The augmented `image` of type uint8.
-  """
-  image = tf.cast(image, dtype=tf.uint8)
-  image = random_brightness(image, brightness, seed=seed)
-  image = random_contrast(image, contrast, seed=seed)
-  image = random_saturation(image, saturation, seed=seed)
-  return image
-
-
-def random_brightness(image: tf.Tensor,
-                      brightness: float = 0.,
-                      seed: Optional[int] = None) -> tf.Tensor:
-  """Jitters brightness of an image.
-
-  Args:
+    """Applies color jitter to an image, similarly to torchvision`s ColorJitter.
+    Args:
       image (tf.Tensor): Of shape [height, width, 3] and type uint8.
       brightness (float, optional): Magnitude for brightness jitter. Defaults to
         0.
+      contrast (float, optional): Magnitude for contrast jitter. Defaults to 0.
+      saturation (float, optional): Magnitude for saturation jitter. Defaults to
+        0.
       seed (int, optional): Random seed. Defaults to None.
-
-  Returns:
+    Returns:
       tf.Tensor: The augmented `image` of type uint8.
-  """
-  assert brightness >= 0, '`brightness` must be positive'
-  brightness = tf.random.uniform([],
-                                 max(0, 1 - brightness),
-                                 1 + brightness,
-                                 seed=seed,
-                                 dtype=tf.float32)
-  return augment.brightness(image, brightness)
+    """
+    image = tf.cast(image, dtype=tf.uint8)
+    image = random_brightness(image, brightness, seed=seed)
+    image = random_contrast(image, contrast, seed=seed)
+    image = random_saturation(image, saturation, seed=seed)
+    return image
+
+
+def random_brightness(image: tf.Tensor,
+                      brightness: float = 0.,
+                      seed: Optional[int] = None) -> tf.Tensor:
+    """Jitters brightness of an image.
+    Args:
+        image (tf.Tensor): Of shape [height, width, 3] and type uint8.
+        brightness (float, optional): Magnitude for brightness jitter. Defaults to
+          0.
+        seed (int, optional): Random seed. Defaults to None.
+    Returns:
+        tf.Tensor: The augmented `image` of type uint8.
+    """
+    assert brightness >= 0, '`brightness` must be positive'
+    brightness = tf.random.uniform([],
+                                   max(0, 1 - brightness),
+                                   1 + brightness,
+                                   seed=seed,
+                                   dtype=tf.float32)
+    return augment.brightness(image, brightness)
 
 
 def random_contrast(image: tf.Tensor,
                     contrast: float = 0.,
                     seed: Optional[int] = None) -> tf.Tensor:
-  """Jitters contrast of an image, similarly to torchvision`s ColorJitter.
-
-  Args:
-      image (tf.Tensor): Of shape [height, width, 3] and type uint8.
-      contrast (float, optional): Magnitude for contrast jitter. Defaults to 0.
-      seed (int, optional): Random seed. Defaults to None.
-
-  Returns:
-      tf.Tensor: The augmented `image` of type uint8.
-  """
-  assert contrast >= 0, '`contrast` must be positive'
-  contrast = tf.random.uniform([],
-                               max(0, 1 - contrast),
-                               1 + contrast,
-                               seed=seed,
-                               dtype=tf.float32)
-  return augment.contrast(image, contrast)
+    """Jitters contrast of an image, similarly to torchvision`s ColorJitter.
+    Args:
+        image (tf.Tensor): Of shape [height, width, 3] and type uint8.
+        contrast (float, optional): Magnitude for contrast jitter. Defaults to 0.
+        seed (int, optional): Random seed. Defaults to None.
+    Returns:
+        tf.Tensor: The augmented `image` of type uint8.
+    """
+    assert contrast >= 0, '`contrast` must be positive'
+    contrast = tf.random.uniform([],
+                                 max(0, 1 - contrast),
+                                 1 + contrast,
+                                 seed=seed,
+                                 dtype=tf.float32)
+    return augment.contrast(image, contrast)
 
 
 def random_saturation(image: tf.Tensor,
                       saturation: float = 0.,
                       seed: Optional[int] = None) -> tf.Tensor:
-  """Jitters saturation of an image, similarly to torchvision`s ColorJitter.
-
-  Args:
-      image (tf.Tensor): Of shape [height, width, 3] and type uint8.
-      saturation (float, optional): Magnitude for saturation jitter. Defaults to
-        0.
-      seed (int, optional): Random seed. Defaults to None.
-
-  Returns:
-      tf.Tensor: The augmented `image` of type uint8.
-  """
-  assert saturation >= 0, '`saturation` must be positive'
-  saturation = tf.random.uniform([],
-                                 max(0, 1 - saturation),
-                                 1 + saturation,
-                                 seed=seed,
-                                 dtype=tf.float32)
-  return _saturation(image, saturation)
+    """Jitters saturation of an image, similarly to torchvision`s ColorJitter.
+    Args:
+        image (tf.Tensor): Of shape [height, width, 3] and type uint8.
+        saturation (float, optional): Magnitude for saturation jitter. Defaults to
+          0.
+        seed (int, optional): Random seed. Defaults to None.
+    Returns:
+        tf.Tensor: The augmented `image` of type uint8.
+    """
+    assert saturation >= 0, '`saturation` must be positive'
+    saturation = tf.random.uniform([],
+                                   max(0, 1 - saturation),
+                                   1 + saturation,
+                                   seed=seed,
+                                   dtype=tf.float32)
+    return _saturation(image, saturation)
 
 
 def _saturation(image: tf.Tensor,
                 saturation: Optional[float] = 0.) -> tf.Tensor:
-  return augment.blend(
-      tf.repeat(tf.image.rgb_to_grayscale(image), 3, axis=-1), image,
-      saturation)
+    return augment.blend(
+        tf.repeat(tf.image.rgb_to_grayscale(image), 3, axis=-1), image,
+        saturation)
 
 
-def random_crop_image_with_boxes_and_labels(img, boxes, labels, min_scale,
+def random_crop_image_with_boxes_and_labels(img,
+                                            boxes,
+                                            labels,
+                                            min_scale,
                                             aspect_ratio_range,
-                                            min_overlap_params, max_retry):
-  """Crops a random slice from the input image.
-
-  The function will correspondingly recompute the bounding boxes and filter out
-  outside boxes and their labels.
-
-  References:
-  [1] End-to-End Object Detection with Transformers
-  https://arxiv.org/abs/2005.12872
-
-  The preprocessing steps:
-  1. Sample a minimum IoU overlap.
-  2. For each trial, sample the new image width, height, and top-left corner.
-  3. Compute the IoUs of bounding boxes with the cropped image and retry if
-    the maximum IoU is below the sampled threshold.
-  4. Find boxes whose centers are in the cropped image.
-  5. Compute new bounding boxes in the cropped region and only select those
-    boxes' labels.
-
-  Args:
-    img: a 'Tensor' of shape [height, width, 3] representing the input image.
-    boxes: a 'Tensor' of shape [N, 4] representing the ground-truth bounding
-      boxes with (ymin, xmin, ymax, xmax).
-    labels: a 'Tensor' of shape [N,] representing the class labels of the boxes.
-    min_scale: a 'float' in [0.0, 1.0) indicating the lower bound of the random
-      scale variable.
-    aspect_ratio_range: a list of two 'float' that specifies the lower and upper
-      bound of the random aspect ratio.
-    min_overlap_params: a list of four 'float' representing the min value, max
-      value, step size, and offset for the minimum overlap sample.
-    max_retry: an 'int' representing the number of trials for cropping. If it is
-      exhausted, no cropping will be performed.
-
-  Returns:
-    img: a Tensor representing the random cropped image. Can be the
-      original image if max_retry is exhausted.
-    boxes: a Tensor representing the bounding boxes in the cropped image.
-    labels: a Tensor representing the new bounding boxes' labels.
-  """
-
-  shape = tf.shape(img)
-  original_h = shape[0]
-  original_w = shape[1]
-
-  minval, maxval, step, offset = min_overlap_params
-
-  min_overlap = tf.math.floordiv(
-      tf.random.uniform([], minval=minval, maxval=maxval), step) * step - offset
-
-  min_overlap = tf.clip_by_value(min_overlap, 0.0, 1.1)
-
-  if min_overlap > 1.0:
-    return img, boxes, labels
-
-  aspect_ratio_low = aspect_ratio_range[0]
-  aspect_ratio_high = aspect_ratio_range[1]
-
-  for _ in tf.range(max_retry):
-    scale_h = tf.random.uniform([], min_scale, 1.0)
-    scale_w = tf.random.uniform([], min_scale, 1.0)
-    new_h = tf.cast(
-        scale_h * tf.cast(original_h, dtype=tf.float32), dtype=tf.int32)
-    new_w = tf.cast(
-        scale_w * tf.cast(original_w, dtype=tf.float32), dtype=tf.int32)
-
-    # Aspect ratio has to be in the prespecified range
-    aspect_ratio = new_h / new_w
-    if aspect_ratio_low > aspect_ratio or aspect_ratio > aspect_ratio_high:
-      continue
-
-    left = tf.random.uniform([], 0, original_w - new_w, dtype=tf.int32)
-    right = left + new_w
-    top = tf.random.uniform([], 0, original_h - new_h, dtype=tf.int32)
-    bottom = top + new_h
-
-    normalized_left = tf.cast(
-        left, dtype=tf.float32) / tf.cast(
+                                            min_overlap_params,
+                                            max_retry):
+    """Crops a random slice from the input image.
+    The function will correspondingly recompute the bounding boxes and filter out
+    outside boxes and their labels.
+    References:
+    [1] End-to-End Object Detection with Transformers
+    https://arxiv.org/abs/2005.12872
+    The preprocessing steps:
+    1. Sample a minimum IoU overlap.
+    2. For each trial, sample the new image width, height, and top-left corner.
+    3. Compute the IoUs of bounding boxes with the cropped image and retry if
+      the maximum IoU is below the sampled threshold.
+    4. Find boxes whose centers are in the cropped image.
+    5. Compute new bounding boxes in the cropped region and only select those
+      boxes' labels.
+    Args:
+      img: a 'Tensor' of shape [height, width, 3] representing the input image.
+      boxes: a 'Tensor' of shape [N, 4] representing the ground-truth bounding
+        boxes with (ymin, xmin, ymax, xmax).
+      labels: a 'Tensor' of shape [N,] representing the class labels of the boxes.
+      min_scale: a 'float' in [0.0, 1.0) indicating the lower bound of the random
+        scale variable.
+      aspect_ratio_range: a list of two 'float' that specifies the lower and upper
+        bound of the random aspect ratio.
+      min_overlap_params: a list of four 'float' representing the min value, max
+        value, step size, and offset for the minimum overlap sample.
+      max_retry: an 'int' representing the number of trials for cropping. If it is
+        exhausted, no cropping will be performed.
+    Returns:
+      img: a Tensor representing the random cropped image. Can be the
+        original image if max_retry is exhausted.
+      boxes: a Tensor representing the bounding boxes in the cropped image.
+      labels: a Tensor representing the new bounding boxes' labels.
+    """
+
+    shape = tf.shape(img)
+    original_h = shape[0]
+    original_w = shape[1]
+
+    minval, maxval, step, offset = min_overlap_params
+
+    min_overlap = tf.math.floordiv(
+        tf.random.uniform([], minval=minval, maxval=maxval), step) * step - offset
+
+    min_overlap = tf.clip_by_value(min_overlap, 0.0, 1.1)
+
+    if min_overlap > 1.0:
+        return img, boxes, labels
+
+    aspect_ratio_low = aspect_ratio_range[0]
+    aspect_ratio_high = aspect_ratio_range[1]
+
+    for _ in tf.range(max_retry):
+        scale_h = tf.random.uniform([], min_scale, 1.0)
+        scale_w = tf.random.uniform([], min_scale, 1.0)
+        new_h = tf.cast(
+            scale_h * tf.cast(original_h, dtype=tf.float32), dtype=tf.int32)
+        new_w = tf.cast(
+            scale_w * tf.cast(original_w, dtype=tf.float32), dtype=tf.int32)
+
+        # Aspect ratio has to be in the prespecified range
+        aspect_ratio = new_h / new_w
+        if aspect_ratio_low > aspect_ratio or aspect_ratio > aspect_ratio_high:
+            continue
+
+        left = tf.random.uniform([], 0, original_w - new_w, dtype=tf.int32)
+        right = left + new_w
+        top = tf.random.uniform([], 0, original_h - new_h, dtype=tf.int32)
+        bottom = top + new_h
+
+        normalized_left = tf.cast(
+            left, dtype=tf.float32) / tf.cast(
             original_w, dtype=tf.float32)
-    normalized_right = tf.cast(
-        right, dtype=tf.float32) / tf.cast(
+        normalized_right = tf.cast(
+            right, dtype=tf.float32) / tf.cast(
             original_w, dtype=tf.float32)
-    normalized_top = tf.cast(
-        top, dtype=tf.float32) / tf.cast(
+        normalized_top = tf.cast(
+            top, dtype=tf.float32) / tf.cast(
             original_h, dtype=tf.float32)
-    normalized_bottom = tf.cast(
-        bottom, dtype=tf.float32) / tf.cast(
+        normalized_bottom = tf.cast(
+            bottom, dtype=tf.float32) / tf.cast(
             original_h, dtype=tf.float32)
 
-    cropped_box = tf.expand_dims(
-        tf.stack([
-            normalized_top,
-            normalized_left,
-            normalized_bottom,
-            normalized_right,
-        ]),
-        axis=0)
-    iou = box_ops.bbox_overlap(
-        tf.expand_dims(cropped_box, axis=0),
-        tf.expand_dims(boxes, axis=0))  # (1, 1, n_ground_truth)
-    iou = tf.squeeze(iou, axis=[0, 1])
-
-    # If not a single bounding box has a Jaccard overlap of greater than
-    # the minimum, try again
-    if tf.reduce_max(iou) < min_overlap:
-      continue
-
-    centroids = box_ops.yxyx_to_cycxhw(boxes)
-    mask = tf.math.logical_and(
-        tf.math.logical_and(centroids[:, 0] > normalized_top,
-                            centroids[:, 0] < normalized_bottom),
-        tf.math.logical_and(centroids[:, 1] > normalized_left,
-                            centroids[:, 1] < normalized_right))
-    # If not a single bounding box has its center in the crop, try again.
-    if tf.reduce_sum(tf.cast(mask, dtype=tf.int32)) > 0:
-      indices = tf.squeeze(tf.where(mask), axis=1)
-
-      filtered_boxes = tf.gather(boxes, indices)
-
-      boxes = tf.clip_by_value(
-          (filtered_boxes[..., :] * tf.cast(
-              tf.stack([original_h, original_w, original_h, original_w]),
-              dtype=tf.float32) -
-           tf.cast(tf.stack([top, left, top, left]), dtype=tf.float32)) /
-          tf.cast(tf.stack([new_h, new_w, new_h, new_w]), dtype=tf.float32),
-          0.0, 1.0)
-
-      img = tf.image.crop_to_bounding_box(img, top, left, bottom - top,
-                                          right - left)
-
-      labels = tf.gather(labels, indices)
-      break
-
-  return img, boxes, labels
+        cropped_box = tf.expand_dims(
+            tf.stack([
+                normalized_top,
+                normalized_left,
+                normalized_bottom,
+                normalized_right,
+            ]),
+            axis=0)
+        iou = box_ops.bbox_overlap(
+            tf.expand_dims(cropped_box, axis=0),
+            tf.expand_dims(boxes, axis=0))  # (1, 1, n_ground_truth)
+        iou = tf.squeeze(iou, axis=[0, 1])
+
+        # If not a single bounding box has a Jaccard overlap of greater than
+        # the minimum, try again
+        if tf.reduce_max(iou) < min_overlap:
+            continue
+
+        centroids = box_ops.yxyx_to_cycxhw(boxes)
+        mask = tf.math.logical_and(
+            tf.math.logical_and(centroids[:, 0] > normalized_top,
+                                centroids[:, 0] < normalized_bottom),
+            tf.math.logical_and(centroids[:, 1] > normalized_left,
+                                centroids[:, 1] < normalized_right))
+        # If not a single bounding box has its center in the crop, try again.
+        if tf.reduce_sum(tf.cast(mask, dtype=tf.int32)) > 0:
+            indices = tf.squeeze(tf.where(mask), axis=1)
+
+            filtered_boxes = tf.gather(boxes, indices)
+
+            boxes = tf.clip_by_value(
+                (filtered_boxes[..., :] * tf.cast(
+                    tf.stack([original_h, original_w, original_h, original_w]),
+                    dtype=tf.float32) -
+                 tf.cast(tf.stack([top, left, top, left]), dtype=tf.float32)) /
+                tf.cast(tf.stack([new_h, new_w, new_h, new_w]), dtype=tf.float32),
+                0.0, 1.0)
+
+            img = tf.image.crop_to_bounding_box(img, top, left, bottom - top,
+                                                right - left)
+
+            labels = tf.gather(labels, indices)
+            break
+
+    return img, boxes, labels
+
+
+def random_crop_image_masks(img,
+                            masks,
+                            min_scale=0.3,
+                            aspect_ratio_range=(0.5, 2.0),
+                            min_overlap_params=(0.0, 1.4, 0.2, 0.1),
+                            max_retry=50,
+                            seed=None):
+    """Randomly crop the image and masks
+    Args:
+      image: a 'Tensor' of shape [height, width, 3] representing the input image.
+      masks: a 'Tensor' of shape [N, height, width, C] representing N masks with C channels
+      min_scale: a 'float' in [0.0, 1.0) indicating the lower bound of the random
+        scale variable.
+      aspect_ratio_range: a list of two 'float' that specifies the lower and upper
+        bound of the random aspect ratio.
+      min_overlap_params: a list of four 'float' representing the min value, max
+        value, step size, and offset for the minimum overlap sample.
+      max_retry: an 'int' representing the number of trials for cropping. If it is
+        exhausted, no cropping will be performed.
+      seed: the random number seed of int, but could be None.
+    Returns:
+      image: a Tensor representing the random cropped image. Can be the
+        original image if max_retry is exhausted.
+      masks: a Tensor representing the masks in the cropped image.
+    """
+
+    shape = tf.shape(img)
+    
+    original_h = shape[0]
+    original_w = shape[1]
+    
+    minval, maxval, step, offset = min_overlap_params
+
+    min_overlap = tf.math.floordiv(
+        tf.random.uniform([], minval=minval, maxval=maxval), step) * step - offset
+
+    min_overlap = tf.clip_by_value(min_overlap, 0.0, 1.1)
+
+    if min_overlap > 1.0:
+        return img, masks
+
+    aspect_ratio_low = aspect_ratio_range[0]
+    aspect_ratio_high = aspect_ratio_range[1]
+
+    for _ in tf.range(max_retry):
+        scale_h = tf.random.uniform([], min_scale, 1.0)
+        scale_w = tf.random.uniform([], min_scale, 1.0)
+        new_h = tf.cast(
+            scale_h * tf.cast(original_h, dtype=tf.float32), dtype=tf.int32)
+        new_w = tf.cast(
+            scale_w * tf.cast(original_w, dtype=tf.float32), dtype=tf.int32)
+
+        # Aspect ratio has to be in the prespecified range
+        aspect_ratio = new_h / new_w
+        if aspect_ratio_low > aspect_ratio or aspect_ratio > aspect_ratio_high:
+            continue
+
+        left = tf.random.uniform([], 0, original_w - new_w, dtype=tf.int32)
+        right = left + new_w
+        top = tf.random.uniform([], 0, original_h - new_h, dtype=tf.int32)
+        bottom = top + new_h
+
+        img = tf.image.crop_to_bounding_box(img, top, left, bottom - top, right - left)
+        masks = tf.image.crop_to_bounding_box(masks, top, left, bottom - top, right - left)
+        break
+
+    return img, masks
 
 
 def random_crop(image,
@@ -998,36 +926,34 @@ def random_crop(image,
                 min_overlap_params=(0.0, 1.4, 0.2, 0.1),
                 max_retry=50,
                 seed=None):
-  """Randomly crop the image and boxes, filtering labels.
-
-  Args:
-    image: a 'Tensor' of shape [height, width, 3] representing the input image.
-    boxes: a 'Tensor' of shape [N, 4] representing the ground-truth bounding
-      boxes with (ymin, xmin, ymax, xmax).
-    labels: a 'Tensor' of shape [N,] representing the class labels of the boxes.
-    min_scale: a 'float' in [0.0, 1.0) indicating the lower bound of the random
-      scale variable.
-    aspect_ratio_range: a list of two 'float' that specifies the lower and upper
-      bound of the random aspect ratio.
-    min_overlap_params: a list of four 'float' representing the min value, max
-      value, step size, and offset for the minimum overlap sample.
-    max_retry: an 'int' representing the number of trials for cropping. If it is
-      exhausted, no cropping will be performed.
-    seed: the random number seed of int, but could be None.
-
-  Returns:
-    image: a Tensor representing the random cropped image. Can be the
-      original image if max_retry is exhausted.
-    boxes: a Tensor representing the bounding boxes in the cropped image.
-    labels: a Tensor representing the new bounding boxes' labels.
-  """
-  with tf.name_scope('random_crop'):
-    do_crop = tf.greater(tf.random.uniform([], seed=seed), 0.5)
-    if do_crop:
-      return random_crop_image_with_boxes_and_labels(image, boxes, labels,
-                                                     min_scale,
-                                                     aspect_ratio_range,
-                                                     min_overlap_params,
-                                                     max_retry)
-    else:
-      return image, boxes, labels
+    """Randomly crop the image and boxes, filtering labels.
+    Args:
+      image: a 'Tensor' of shape [height, width, 3] representing the input image.
+      boxes: a 'Tensor' of shape [N, 4] representing the ground-truth bounding
+        boxes with (ymin, xmin, ymax, xmax).
+      labels: a 'Tensor' of shape [N,] representing the class labels of the boxes.
+      min_scale: a 'float' in [0.0, 1.0) indicating the lower bound of the random
+        scale variable.
+      aspect_ratio_range: a list of two 'float' that specifies the lower and upper
+        bound of the random aspect ratio.
+      min_overlap_params: a list of four 'float' representing the min value, max
+        value, step size, and offset for the minimum overlap sample.
+      max_retry: an 'int' representing the number of trials for cropping. If it is
+        exhausted, no cropping will be performed.
+      seed: the random number seed of int, but could be None.
+    Returns:
+      image: a Tensor representing the random cropped image. Can be the
+        original image if max_retry is exhausted.
+      boxes: a Tensor representing the bounding boxes in the cropped image.
+      labels: a Tensor representing the new bounding boxes' labels.
+    """
+    with tf.name_scope('random_crop'):
+        do_crop = tf.greater(tf.random.uniform([], seed=seed), 0.5)
+        if do_crop:
+            return random_crop_image_with_boxes_and_labels(image, boxes, labels,
+                                                           min_scale,
+                                                           aspect_ratio_range,
+                                                           min_overlap_params,
+                                                           max_retry)
+        else:
+            return image, boxes, labels
diff --git a/models/official/vision/tasks/__pycache__/__init__.cpython-39.pyc b/models/official/vision/tasks/__pycache__/__init__.cpython-39.pyc
index 197cb54f..8667ae9f 100644
Binary files a/models/official/vision/tasks/__pycache__/__init__.cpython-39.pyc and b/models/official/vision/tasks/__pycache__/__init__.cpython-39.pyc differ
diff --git a/models/official/vision/tasks/__pycache__/image_classification.cpython-39.pyc b/models/official/vision/tasks/__pycache__/image_classification.cpython-39.pyc
index 4e006a0e..490680fc 100644
Binary files a/models/official/vision/tasks/__pycache__/image_classification.cpython-39.pyc and b/models/official/vision/tasks/__pycache__/image_classification.cpython-39.pyc differ
diff --git a/models/official/vision/tasks/__pycache__/maskrcnn.cpython-39.pyc b/models/official/vision/tasks/__pycache__/maskrcnn.cpython-39.pyc
index 98c62c16..6c11d2f3 100644
Binary files a/models/official/vision/tasks/__pycache__/maskrcnn.cpython-39.pyc and b/models/official/vision/tasks/__pycache__/maskrcnn.cpython-39.pyc differ
diff --git a/models/official/vision/tasks/__pycache__/retinanet.cpython-39.pyc b/models/official/vision/tasks/__pycache__/retinanet.cpython-39.pyc
index 1ecf9158..5f962117 100644
Binary files a/models/official/vision/tasks/__pycache__/retinanet.cpython-39.pyc and b/models/official/vision/tasks/__pycache__/retinanet.cpython-39.pyc differ
diff --git a/models/official/vision/tasks/__pycache__/semantic_segmentation.cpython-39.pyc b/models/official/vision/tasks/__pycache__/semantic_segmentation.cpython-39.pyc
index 11644322..afccd89b 100644
Binary files a/models/official/vision/tasks/__pycache__/semantic_segmentation.cpython-39.pyc and b/models/official/vision/tasks/__pycache__/semantic_segmentation.cpython-39.pyc differ
diff --git a/models/official/vision/tasks/__pycache__/video_classification.cpython-39.pyc b/models/official/vision/tasks/__pycache__/video_classification.cpython-39.pyc
index 75452377..84d853c1 100644
Binary files a/models/official/vision/tasks/__pycache__/video_classification.cpython-39.pyc and b/models/official/vision/tasks/__pycache__/video_classification.cpython-39.pyc differ
diff --git a/models/official/vision/utils/__pycache__/__init__.cpython-39.pyc b/models/official/vision/utils/__pycache__/__init__.cpython-39.pyc
index 3d37eedd..075df204 100644
Binary files a/models/official/vision/utils/__pycache__/__init__.cpython-39.pyc and b/models/official/vision/utils/__pycache__/__init__.cpython-39.pyc differ
diff --git a/models/official/vision/utils/object_detection/__pycache__/__init__.cpython-39.pyc b/models/official/vision/utils/object_detection/__pycache__/__init__.cpython-39.pyc
index c6430325..b41932be 100644
Binary files a/models/official/vision/utils/object_detection/__pycache__/__init__.cpython-39.pyc and b/models/official/vision/utils/object_detection/__pycache__/__init__.cpython-39.pyc differ
diff --git a/models/official/vision/utils/object_detection/__pycache__/balanced_positive_negative_sampler.cpython-39.pyc b/models/official/vision/utils/object_detection/__pycache__/balanced_positive_negative_sampler.cpython-39.pyc
index 6907b982..d6ca6287 100644
Binary files a/models/official/vision/utils/object_detection/__pycache__/balanced_positive_negative_sampler.cpython-39.pyc and b/models/official/vision/utils/object_detection/__pycache__/balanced_positive_negative_sampler.cpython-39.pyc differ
diff --git a/models/official/vision/utils/object_detection/__pycache__/box_coder.cpython-39.pyc b/models/official/vision/utils/object_detection/__pycache__/box_coder.cpython-39.pyc
index fcc339b6..33d752cc 100644
Binary files a/models/official/vision/utils/object_detection/__pycache__/box_coder.cpython-39.pyc and b/models/official/vision/utils/object_detection/__pycache__/box_coder.cpython-39.pyc differ
diff --git a/models/official/vision/utils/object_detection/__pycache__/box_list.cpython-39.pyc b/models/official/vision/utils/object_detection/__pycache__/box_list.cpython-39.pyc
index ce9146e4..2b8bb464 100644
Binary files a/models/official/vision/utils/object_detection/__pycache__/box_list.cpython-39.pyc and b/models/official/vision/utils/object_detection/__pycache__/box_list.cpython-39.pyc differ
diff --git a/models/official/vision/utils/object_detection/__pycache__/faster_rcnn_box_coder.cpython-39.pyc b/models/official/vision/utils/object_detection/__pycache__/faster_rcnn_box_coder.cpython-39.pyc
index 973a3631..361e44b8 100644
Binary files a/models/official/vision/utils/object_detection/__pycache__/faster_rcnn_box_coder.cpython-39.pyc and b/models/official/vision/utils/object_detection/__pycache__/faster_rcnn_box_coder.cpython-39.pyc differ
diff --git a/models/official/vision/utils/object_detection/__pycache__/minibatch_sampler.cpython-39.pyc b/models/official/vision/utils/object_detection/__pycache__/minibatch_sampler.cpython-39.pyc
index d2ae1c32..d31b4929 100644
Binary files a/models/official/vision/utils/object_detection/__pycache__/minibatch_sampler.cpython-39.pyc and b/models/official/vision/utils/object_detection/__pycache__/minibatch_sampler.cpython-39.pyc differ
diff --git a/models/official/vision/utils/object_detection/__pycache__/ops.cpython-39.pyc b/models/official/vision/utils/object_detection/__pycache__/ops.cpython-39.pyc
index fb62b127..ae07c0f8 100644
Binary files a/models/official/vision/utils/object_detection/__pycache__/ops.cpython-39.pyc and b/models/official/vision/utils/object_detection/__pycache__/ops.cpython-39.pyc differ
diff --git a/models/official/vision/utils/object_detection/__pycache__/shape_utils.cpython-39.pyc b/models/official/vision/utils/object_detection/__pycache__/shape_utils.cpython-39.pyc
index cd77de6e..2134bc42 100644
Binary files a/models/official/vision/utils/object_detection/__pycache__/shape_utils.cpython-39.pyc and b/models/official/vision/utils/object_detection/__pycache__/shape_utils.cpython-39.pyc differ
diff --git a/models/official/vision/utils/object_detection/__pycache__/visualization_utils.cpython-39.pyc b/models/official/vision/utils/object_detection/__pycache__/visualization_utils.cpython-39.pyc
index 199d5727..203cf1bf 100644
Binary files a/models/official/vision/utils/object_detection/__pycache__/visualization_utils.cpython-39.pyc and b/models/official/vision/utils/object_detection/__pycache__/visualization_utils.cpython-39.pyc differ
diff --git a/models/orbit/__pycache__/__init__.cpython-38.pyc b/models/orbit/__pycache__/__init__.cpython-38.pyc
new file mode 100644
index 00000000..7e2dda99
Binary files /dev/null and b/models/orbit/__pycache__/__init__.cpython-38.pyc differ
diff --git a/models/orbit/__pycache__/__init__.cpython-39.pyc b/models/orbit/__pycache__/__init__.cpython-39.pyc
index 8a220520..d9c50c4a 100644
Binary files a/models/orbit/__pycache__/__init__.cpython-39.pyc and b/models/orbit/__pycache__/__init__.cpython-39.pyc differ
diff --git a/models/orbit/__pycache__/controller.cpython-38.pyc b/models/orbit/__pycache__/controller.cpython-38.pyc
new file mode 100644
index 00000000..7b192fb4
Binary files /dev/null and b/models/orbit/__pycache__/controller.cpython-38.pyc differ
diff --git a/models/orbit/__pycache__/controller.cpython-39.pyc b/models/orbit/__pycache__/controller.cpython-39.pyc
index 0c7592d2..0295d785 100644
Binary files a/models/orbit/__pycache__/controller.cpython-39.pyc and b/models/orbit/__pycache__/controller.cpython-39.pyc differ
diff --git a/models/orbit/__pycache__/runner.cpython-38.pyc b/models/orbit/__pycache__/runner.cpython-38.pyc
new file mode 100644
index 00000000..d899b0b5
Binary files /dev/null and b/models/orbit/__pycache__/runner.cpython-38.pyc differ
diff --git a/models/orbit/__pycache__/runner.cpython-39.pyc b/models/orbit/__pycache__/runner.cpython-39.pyc
index c455c86f..79c70d58 100644
Binary files a/models/orbit/__pycache__/runner.cpython-39.pyc and b/models/orbit/__pycache__/runner.cpython-39.pyc differ
diff --git a/models/orbit/__pycache__/standard_runner.cpython-38.pyc b/models/orbit/__pycache__/standard_runner.cpython-38.pyc
new file mode 100644
index 00000000..79911b09
Binary files /dev/null and b/models/orbit/__pycache__/standard_runner.cpython-38.pyc differ
diff --git a/models/orbit/__pycache__/standard_runner.cpython-39.pyc b/models/orbit/__pycache__/standard_runner.cpython-39.pyc
index 32dd1425..9c751972 100644
Binary files a/models/orbit/__pycache__/standard_runner.cpython-39.pyc and b/models/orbit/__pycache__/standard_runner.cpython-39.pyc differ
diff --git a/models/orbit/actions/__pycache__/__init__.cpython-38.pyc b/models/orbit/actions/__pycache__/__init__.cpython-38.pyc
new file mode 100644
index 00000000..9cb968cc
Binary files /dev/null and b/models/orbit/actions/__pycache__/__init__.cpython-38.pyc differ
diff --git a/models/orbit/actions/__pycache__/__init__.cpython-39.pyc b/models/orbit/actions/__pycache__/__init__.cpython-39.pyc
index 19216db4..14ee98fc 100644
Binary files a/models/orbit/actions/__pycache__/__init__.cpython-39.pyc and b/models/orbit/actions/__pycache__/__init__.cpython-39.pyc differ
diff --git a/models/orbit/actions/__pycache__/conditional_action.cpython-38.pyc b/models/orbit/actions/__pycache__/conditional_action.cpython-38.pyc
new file mode 100644
index 00000000..b92edbb7
Binary files /dev/null and b/models/orbit/actions/__pycache__/conditional_action.cpython-38.pyc differ
diff --git a/models/orbit/actions/__pycache__/conditional_action.cpython-39.pyc b/models/orbit/actions/__pycache__/conditional_action.cpython-39.pyc
index 01031177..2bd898d2 100644
Binary files a/models/orbit/actions/__pycache__/conditional_action.cpython-39.pyc and b/models/orbit/actions/__pycache__/conditional_action.cpython-39.pyc differ
diff --git a/models/orbit/actions/__pycache__/export_saved_model.cpython-38.pyc b/models/orbit/actions/__pycache__/export_saved_model.cpython-38.pyc
new file mode 100644
index 00000000..c4e8662c
Binary files /dev/null and b/models/orbit/actions/__pycache__/export_saved_model.cpython-38.pyc differ
diff --git a/models/orbit/actions/__pycache__/export_saved_model.cpython-39.pyc b/models/orbit/actions/__pycache__/export_saved_model.cpython-39.pyc
index de695fe4..fcecd422 100644
Binary files a/models/orbit/actions/__pycache__/export_saved_model.cpython-39.pyc and b/models/orbit/actions/__pycache__/export_saved_model.cpython-39.pyc differ
diff --git a/models/orbit/actions/__pycache__/new_best_metric.cpython-38.pyc b/models/orbit/actions/__pycache__/new_best_metric.cpython-38.pyc
new file mode 100644
index 00000000..8b97b036
Binary files /dev/null and b/models/orbit/actions/__pycache__/new_best_metric.cpython-38.pyc differ
diff --git a/models/orbit/actions/__pycache__/new_best_metric.cpython-39.pyc b/models/orbit/actions/__pycache__/new_best_metric.cpython-39.pyc
index 27cb7016..bec62a9f 100644
Binary files a/models/orbit/actions/__pycache__/new_best_metric.cpython-39.pyc and b/models/orbit/actions/__pycache__/new_best_metric.cpython-39.pyc differ
diff --git a/models/orbit/actions/__pycache__/save_checkpoint_if_preempted.cpython-38.pyc b/models/orbit/actions/__pycache__/save_checkpoint_if_preempted.cpython-38.pyc
new file mode 100644
index 00000000..f0f01d76
Binary files /dev/null and b/models/orbit/actions/__pycache__/save_checkpoint_if_preempted.cpython-38.pyc differ
diff --git a/models/orbit/actions/__pycache__/save_checkpoint_if_preempted.cpython-39.pyc b/models/orbit/actions/__pycache__/save_checkpoint_if_preempted.cpython-39.pyc
index 5d16fb02..67ed9900 100644
Binary files a/models/orbit/actions/__pycache__/save_checkpoint_if_preempted.cpython-39.pyc and b/models/orbit/actions/__pycache__/save_checkpoint_if_preempted.cpython-39.pyc differ
diff --git a/models/orbit/controller.py b/models/orbit/controller.py
index 1f277231..054b9007 100644
--- a/models/orbit/controller.py
+++ b/models/orbit/controller.py
@@ -481,6 +481,7 @@ def _train_n_steps(self, num_steps: int):
         should_record = lambda: (self.global_step % self.summary_interval == 0)
       with tf.summary.record_if(should_record):
         num_steps_tensor = tf.convert_to_tensor(num_steps, dtype=tf.int32)
+        
         train_output = self.trainer.train(num_steps_tensor)
 
     # Verify that global_step was updated properly, then update current_step.
diff --git a/models/orbit/utils/__pycache__/__init__.cpython-38.pyc b/models/orbit/utils/__pycache__/__init__.cpython-38.pyc
new file mode 100644
index 00000000..ef2b9c08
Binary files /dev/null and b/models/orbit/utils/__pycache__/__init__.cpython-38.pyc differ
diff --git a/models/orbit/utils/__pycache__/__init__.cpython-39.pyc b/models/orbit/utils/__pycache__/__init__.cpython-39.pyc
index e78ca24c..9bc676c5 100644
Binary files a/models/orbit/utils/__pycache__/__init__.cpython-39.pyc and b/models/orbit/utils/__pycache__/__init__.cpython-39.pyc differ
diff --git a/models/orbit/utils/__pycache__/common.cpython-38.pyc b/models/orbit/utils/__pycache__/common.cpython-38.pyc
new file mode 100644
index 00000000..30cf3e1f
Binary files /dev/null and b/models/orbit/utils/__pycache__/common.cpython-38.pyc differ
diff --git a/models/orbit/utils/__pycache__/common.cpython-39.pyc b/models/orbit/utils/__pycache__/common.cpython-39.pyc
index 0fbdd179..ed758944 100644
Binary files a/models/orbit/utils/__pycache__/common.cpython-39.pyc and b/models/orbit/utils/__pycache__/common.cpython-39.pyc differ
diff --git a/models/orbit/utils/__pycache__/epoch_helper.cpython-38.pyc b/models/orbit/utils/__pycache__/epoch_helper.cpython-38.pyc
new file mode 100644
index 00000000..eb8f702d
Binary files /dev/null and b/models/orbit/utils/__pycache__/epoch_helper.cpython-38.pyc differ
diff --git a/models/orbit/utils/__pycache__/epoch_helper.cpython-39.pyc b/models/orbit/utils/__pycache__/epoch_helper.cpython-39.pyc
index 9c55ca26..a947937b 100644
Binary files a/models/orbit/utils/__pycache__/epoch_helper.cpython-39.pyc and b/models/orbit/utils/__pycache__/epoch_helper.cpython-39.pyc differ
diff --git a/models/orbit/utils/__pycache__/loop_fns.cpython-38.pyc b/models/orbit/utils/__pycache__/loop_fns.cpython-38.pyc
new file mode 100644
index 00000000..7f28861c
Binary files /dev/null and b/models/orbit/utils/__pycache__/loop_fns.cpython-38.pyc differ
diff --git a/models/orbit/utils/__pycache__/loop_fns.cpython-39.pyc b/models/orbit/utils/__pycache__/loop_fns.cpython-39.pyc
index 81826598..d295ae07 100644
Binary files a/models/orbit/utils/__pycache__/loop_fns.cpython-39.pyc and b/models/orbit/utils/__pycache__/loop_fns.cpython-39.pyc differ
diff --git a/models/orbit/utils/__pycache__/summary_manager.cpython-38.pyc b/models/orbit/utils/__pycache__/summary_manager.cpython-38.pyc
new file mode 100644
index 00000000..e5f0288c
Binary files /dev/null and b/models/orbit/utils/__pycache__/summary_manager.cpython-38.pyc differ
diff --git a/models/orbit/utils/__pycache__/summary_manager.cpython-39.pyc b/models/orbit/utils/__pycache__/summary_manager.cpython-39.pyc
index eb04aeb8..d84a14c9 100644
Binary files a/models/orbit/utils/__pycache__/summary_manager.cpython-39.pyc and b/models/orbit/utils/__pycache__/summary_manager.cpython-39.pyc differ
diff --git a/models/orbit/utils/__pycache__/summary_manager_interface.cpython-38.pyc b/models/orbit/utils/__pycache__/summary_manager_interface.cpython-38.pyc
new file mode 100644
index 00000000..1913b1af
Binary files /dev/null and b/models/orbit/utils/__pycache__/summary_manager_interface.cpython-38.pyc differ
diff --git a/models/orbit/utils/__pycache__/summary_manager_interface.cpython-39.pyc b/models/orbit/utils/__pycache__/summary_manager_interface.cpython-39.pyc
index 0d31a863..813ecf16 100644
Binary files a/models/orbit/utils/__pycache__/summary_manager_interface.cpython-39.pyc and b/models/orbit/utils/__pycache__/summary_manager_interface.cpython-39.pyc differ
diff --git a/models/orbit/utils/__pycache__/tpu_summaries.cpython-38.pyc b/models/orbit/utils/__pycache__/tpu_summaries.cpython-38.pyc
new file mode 100644
index 00000000..7999d962
Binary files /dev/null and b/models/orbit/utils/__pycache__/tpu_summaries.cpython-38.pyc differ
diff --git a/models/orbit/utils/__pycache__/tpu_summaries.cpython-39.pyc b/models/orbit/utils/__pycache__/tpu_summaries.cpython-39.pyc
index c540b0f2..bb5fb0b7 100644
Binary files a/models/orbit/utils/__pycache__/tpu_summaries.cpython-39.pyc and b/models/orbit/utils/__pycache__/tpu_summaries.cpython-39.pyc differ
diff --git a/params.yaml b/params.yaml
new file mode 100644
index 00000000..7503c753
--- /dev/null
+++ b/params.yaml
@@ -0,0 +1,187 @@
+runtime:
+  all_reduce_alg: null
+  batchnorm_spatial_persistent: false
+  dataset_num_private_threads: null
+  default_shard_dim: -1
+  distribution_strategy: mirrored
+  enable_xla: false
+  gpu_thread_mode: null
+  loss_scale: null
+  mixed_precision_dtype: null
+  num_cores_per_replica: 1
+  num_gpus: 0
+  num_packs: 1
+  per_gpu_thread_count: 0
+  run_eagerly: false
+  task_index: -1
+  tpu: null
+  tpu_enable_xla_dynamic_padder: null
+  worker_hosts: null
+task:
+  allow_image_summary: false
+  annotation_file: null
+  differential_privacy_config: null
+  init_checkpoint: ''
+  init_checkpoint_modules: backbone
+  losses:
+    background_cls_weight: 0.1
+    class_offset: 0
+    l2_weight_decay: 0.0001
+    lambda_box: 5.0
+    lambda_cls: 1.0
+    lambda_giou: 2.0
+  model:
+    backbone:
+      resnet:
+        bn_trainable: false
+        depth_multiplier: 1.0
+        model_id: 50
+        replace_stem_max_pool: false
+        resnetd_shortcut: false
+        scale_stem: true
+        se_ratio: 0.0
+        stem_type: v0
+        stochastic_depth_drop_rate: 0.0
+      type: resnet
+    backbone_endpoint_name: '5'
+    hidden_size: 256
+    input_size: [1333, 1333, 3]
+    norm_activation:
+      activation: relu
+      norm_epsilon: 0.001
+      norm_momentum: 0.99
+      use_sync_bn: true
+    num_classes: 81
+    num_decoder_layers: 6
+    num_encoder_layers: 6
+    num_queries: 100
+  name: null
+  per_category_metrics: false
+  train_data:
+    apply_tf_data_service_before_batching: false
+    block_length: 1
+    cache: false
+    cycle_length: null
+    deterministic: null
+    drop_remainder: true
+    enable_shared_tf_data_service_between_parallel_trainers: false
+    enable_tf_data_service: false
+    global_batch_size: 64
+    input_path: ''
+    is_training: true
+    max_num_boxes: 100
+    output_size: !!python/tuple
+    - 1333
+    - 1333
+    prefetch_buffer_size: null
+    resize_scales: !!python/tuple
+    - 480
+    - 512
+    - 544
+    - 576
+    - 608
+    - 640
+    - 672
+    - 704
+    - 736
+    - 768
+    - 800
+    seed: null
+    sharding: true
+    shuffle_buffer_size: 1000
+    tf_data_service_address: null
+    tf_data_service_job_name: null
+    tfds_as_supervised: false
+    tfds_data_dir: ''
+    tfds_name: coco/2017
+    tfds_skip_decoding_feature: ''
+    tfds_split: train
+    trainer_id: null
+  validation_data:
+    apply_tf_data_service_before_batching: false
+    block_length: 1
+    cache: false
+    cycle_length: null
+    deterministic: null
+    drop_remainder: false
+    enable_shared_tf_data_service_between_parallel_trainers: false
+    enable_tf_data_service: false
+    global_batch_size: 64
+    input_path: ''
+    is_training: false
+    max_num_boxes: 100
+    output_size: !!python/tuple
+    - 1333
+    - 1333
+    prefetch_buffer_size: null
+    resize_scales: !!python/tuple
+    - 480
+    - 512
+    - 544
+    - 576
+    - 608
+    - 640
+    - 672
+    - 704
+    - 736
+    - 768
+    - 800
+    seed: null
+    sharding: true
+    shuffle_buffer_size: 100
+    tf_data_service_address: null
+    tf_data_service_job_name: null
+    tfds_as_supervised: false
+    tfds_data_dir: ''
+    tfds_name: coco/2017
+    tfds_skip_decoding_feature: ''
+    tfds_split: validation
+    trainer_id: null
+trainer:
+  allow_tpu_summary: false
+  best_checkpoint_eval_metric: AP
+  best_checkpoint_export_subdir: best_ckpt
+  best_checkpoint_metric_comp: higher
+  checkpoint_interval: 10000
+  continuous_eval_timeout: 3600
+  eval_tf_function: true
+  eval_tf_while_loop: false
+  loss_upper_bound: 1000000.0
+  max_to_keep: 1
+  optimizer_config:
+    ema: null
+    learning_rate:
+      stepwise:
+        boundaries: [369600]
+        name: PiecewiseConstantDecay
+        offset: 0
+        values: [0.0001, 1.0e-05]
+      type: stepwise
+    optimizer:
+      detr_adamw:
+        amsgrad: false
+        beta_1: 0.9
+        beta_2: 0.999
+        clipnorm: null
+        clipvalue: null
+        epsilon: 1.0e-07
+        exclude_from_weight_decay: null
+        global_clipnorm: 0.1
+        gradient_clip_norm: 0.0
+        include_in_weight_decay: null
+        name: AdamWeightDecay
+        weight_decay_rate: 0.0001
+      type: detr_adamw
+    warmup:
+      type: null
+  preemption_on_demand_checkpoint: true
+  recovery_begin_steps: 0
+  recovery_max_trials: 0
+  steps_per_loop: 10000
+  summary_interval: 10000
+  train_steps: 554400
+  train_tf_function: true
+  train_tf_while_loop: true
+  validation_interval: 10000
+  validation_steps: -1
+  validation_summary_subdir: validation