diff --git a/category_mask.npy b/category_mask.npy new file mode 100644 index 00000000..1ad094d5 Binary files /dev/null and b/category_mask.npy differ diff --git a/img.npy b/img.npy new file mode 100644 index 00000000..72ca0b47 Binary files /dev/null and b/img.npy differ diff --git a/individual_masks.npy b/individual_masks.npy new file mode 100644 index 00000000..65900a4d Binary files /dev/null and b/individual_masks.npy differ diff --git a/instance_mask.npy b/instance_mask.npy new file mode 100644 index 00000000..a9a691dc Binary files /dev/null and b/instance_mask.npy differ diff --git a/models/official/__pycache__/__init__.cpython-38.pyc b/models/official/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 00000000..d825fb2a Binary files /dev/null and b/models/official/__pycache__/__init__.cpython-38.pyc differ diff --git a/models/official/__pycache__/__init__.cpython-39.pyc b/models/official/__pycache__/__init__.cpython-39.pyc index c83ad39d..bba3a2aa 100644 Binary files a/models/official/__pycache__/__init__.cpython-39.pyc and b/models/official/__pycache__/__init__.cpython-39.pyc differ diff --git a/models/official/common/__pycache__/__init__.cpython-38.pyc b/models/official/common/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 00000000..23e9e1b4 Binary files /dev/null and b/models/official/common/__pycache__/__init__.cpython-38.pyc differ diff --git a/models/official/common/__pycache__/__init__.cpython-39.pyc b/models/official/common/__pycache__/__init__.cpython-39.pyc index 36f7d95b..28f12c15 100644 Binary files a/models/official/common/__pycache__/__init__.cpython-39.pyc and b/models/official/common/__pycache__/__init__.cpython-39.pyc differ diff --git a/models/official/common/__pycache__/dataset_fn.cpython-39.pyc b/models/official/common/__pycache__/dataset_fn.cpython-39.pyc index 75b8cb04..3ad97d06 100644 Binary files a/models/official/common/__pycache__/dataset_fn.cpython-39.pyc and b/models/official/common/__pycache__/dataset_fn.cpython-39.pyc differ diff --git a/models/official/common/__pycache__/distribute_utils.cpython-38.pyc b/models/official/common/__pycache__/distribute_utils.cpython-38.pyc new file mode 100644 index 00000000..fc9cd4e2 Binary files /dev/null and b/models/official/common/__pycache__/distribute_utils.cpython-38.pyc differ diff --git a/models/official/common/__pycache__/distribute_utils.cpython-39.pyc b/models/official/common/__pycache__/distribute_utils.cpython-39.pyc index e3814841..a3427945 100644 Binary files a/models/official/common/__pycache__/distribute_utils.cpython-39.pyc and b/models/official/common/__pycache__/distribute_utils.cpython-39.pyc differ diff --git a/models/official/common/distribute_utils.py b/models/official/common/distribute_utils.py index 58d63038..19d417be 100644 --- a/models/official/common/distribute_utils.py +++ b/models/official/common/distribute_utils.py @@ -82,8 +82,9 @@ def tpu_initialize(tpu_address): Returns: A TPUClusterResolver. """ + cluster_resolver = tf.distribute.cluster_resolver.TPUClusterResolver( - tpu=tpu_address) + tpu=tpu_address, project=os.environ["TPU_PROJECT"], zone=os.environ["TPU_ZONE"]) if tpu_address not in ("", "local"): tf.config.experimental_connect_to_cluster(cluster_resolver) tf.tpu.experimental.initialize_tpu_system(cluster_resolver) diff --git a/models/official/core/__pycache__/__init__.cpython-38.pyc b/models/official/core/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 00000000..e1f049c5 Binary files /dev/null and b/models/official/core/__pycache__/__init__.cpython-38.pyc differ diff --git a/models/official/core/__pycache__/__init__.cpython-39.pyc b/models/official/core/__pycache__/__init__.cpython-39.pyc index bef69d9a..ebbb5b35 100644 Binary files a/models/official/core/__pycache__/__init__.cpython-39.pyc and b/models/official/core/__pycache__/__init__.cpython-39.pyc differ diff --git a/models/official/core/__pycache__/actions.cpython-38.pyc b/models/official/core/__pycache__/actions.cpython-38.pyc new file mode 100644 index 00000000..e8e69ffc Binary files /dev/null and b/models/official/core/__pycache__/actions.cpython-38.pyc differ diff --git a/models/official/core/__pycache__/actions.cpython-39.pyc b/models/official/core/__pycache__/actions.cpython-39.pyc index e97138b8..d3f04d69 100644 Binary files a/models/official/core/__pycache__/actions.cpython-39.pyc and b/models/official/core/__pycache__/actions.cpython-39.pyc differ diff --git a/models/official/core/__pycache__/base_task.cpython-38.pyc b/models/official/core/__pycache__/base_task.cpython-38.pyc new file mode 100644 index 00000000..39c6776e Binary files /dev/null and b/models/official/core/__pycache__/base_task.cpython-38.pyc differ diff --git a/models/official/core/__pycache__/base_task.cpython-39.pyc b/models/official/core/__pycache__/base_task.cpython-39.pyc index 9d445b11..0fd3985a 100644 Binary files a/models/official/core/__pycache__/base_task.cpython-39.pyc and b/models/official/core/__pycache__/base_task.cpython-39.pyc differ diff --git a/models/official/core/__pycache__/base_trainer.cpython-38.pyc b/models/official/core/__pycache__/base_trainer.cpython-38.pyc new file mode 100644 index 00000000..4a7940a4 Binary files /dev/null and b/models/official/core/__pycache__/base_trainer.cpython-38.pyc differ diff --git a/models/official/core/__pycache__/base_trainer.cpython-39.pyc b/models/official/core/__pycache__/base_trainer.cpython-39.pyc index 0fab0ffe..aac41892 100644 Binary files a/models/official/core/__pycache__/base_trainer.cpython-39.pyc and b/models/official/core/__pycache__/base_trainer.cpython-39.pyc differ diff --git a/models/official/core/__pycache__/config_definitions.cpython-38.pyc b/models/official/core/__pycache__/config_definitions.cpython-38.pyc new file mode 100644 index 00000000..52c050c6 Binary files /dev/null and b/models/official/core/__pycache__/config_definitions.cpython-38.pyc differ diff --git a/models/official/core/__pycache__/config_definitions.cpython-39.pyc b/models/official/core/__pycache__/config_definitions.cpython-39.pyc index 68a11afe..b25ae89e 100644 Binary files a/models/official/core/__pycache__/config_definitions.cpython-39.pyc and b/models/official/core/__pycache__/config_definitions.cpython-39.pyc differ diff --git a/models/official/core/__pycache__/exp_factory.cpython-39.pyc b/models/official/core/__pycache__/exp_factory.cpython-39.pyc index 1dea8ed8..cd0c06df 100644 Binary files a/models/official/core/__pycache__/exp_factory.cpython-39.pyc and b/models/official/core/__pycache__/exp_factory.cpython-39.pyc differ diff --git a/models/official/core/__pycache__/export_base.cpython-39.pyc b/models/official/core/__pycache__/export_base.cpython-39.pyc index c9b689b1..f06560a8 100644 Binary files a/models/official/core/__pycache__/export_base.cpython-39.pyc and b/models/official/core/__pycache__/export_base.cpython-39.pyc differ diff --git a/models/official/core/__pycache__/file_writers.cpython-39.pyc b/models/official/core/__pycache__/file_writers.cpython-39.pyc index 651e7a39..74ecf973 100644 Binary files a/models/official/core/__pycache__/file_writers.cpython-39.pyc and b/models/official/core/__pycache__/file_writers.cpython-39.pyc differ diff --git a/models/official/core/__pycache__/input_reader.cpython-39.pyc b/models/official/core/__pycache__/input_reader.cpython-39.pyc index 23130683..ba60335a 100644 Binary files a/models/official/core/__pycache__/input_reader.cpython-39.pyc and b/models/official/core/__pycache__/input_reader.cpython-39.pyc differ diff --git a/models/official/core/__pycache__/registry.cpython-39.pyc b/models/official/core/__pycache__/registry.cpython-39.pyc index 3b030d93..96260adb 100644 Binary files a/models/official/core/__pycache__/registry.cpython-39.pyc and b/models/official/core/__pycache__/registry.cpython-39.pyc differ diff --git a/models/official/core/__pycache__/savedmodel_checkpoint_manager.cpython-39.pyc b/models/official/core/__pycache__/savedmodel_checkpoint_manager.cpython-39.pyc index b313d2bf..b04baea2 100644 Binary files a/models/official/core/__pycache__/savedmodel_checkpoint_manager.cpython-39.pyc and b/models/official/core/__pycache__/savedmodel_checkpoint_manager.cpython-39.pyc differ diff --git a/models/official/core/__pycache__/task_factory.cpython-39.pyc b/models/official/core/__pycache__/task_factory.cpython-39.pyc index bf45deea..573884ea 100644 Binary files a/models/official/core/__pycache__/task_factory.cpython-39.pyc and b/models/official/core/__pycache__/task_factory.cpython-39.pyc differ diff --git a/models/official/core/__pycache__/tf_example_builder.cpython-39.pyc b/models/official/core/__pycache__/tf_example_builder.cpython-39.pyc index f4c56c99..6a47ec37 100644 Binary files a/models/official/core/__pycache__/tf_example_builder.cpython-39.pyc and b/models/official/core/__pycache__/tf_example_builder.cpython-39.pyc differ diff --git a/models/official/core/__pycache__/tf_example_feature_key.cpython-39.pyc b/models/official/core/__pycache__/tf_example_feature_key.cpython-39.pyc index 2c950baf..2f001042 100644 Binary files a/models/official/core/__pycache__/tf_example_feature_key.cpython-39.pyc and b/models/official/core/__pycache__/tf_example_feature_key.cpython-39.pyc differ diff --git a/models/official/core/__pycache__/train_lib.cpython-39.pyc b/models/official/core/__pycache__/train_lib.cpython-39.pyc index 21d735b6..1ea705d2 100644 Binary files a/models/official/core/__pycache__/train_lib.cpython-39.pyc and b/models/official/core/__pycache__/train_lib.cpython-39.pyc differ diff --git a/models/official/core/__pycache__/train_utils.cpython-39.pyc b/models/official/core/__pycache__/train_utils.cpython-39.pyc index 8f2f8d31..d29330af 100644 Binary files a/models/official/core/__pycache__/train_utils.cpython-39.pyc and b/models/official/core/__pycache__/train_utils.cpython-39.pyc differ diff --git a/models/official/core/actions.py b/models/official/core/actions.py index 5a092b8a..7b4f4195 100644 --- a/models/official/core/actions.py +++ b/models/official/core/actions.py @@ -222,15 +222,15 @@ def get_train_actions( ) train_actions.append(recover_action) - if ( - params.trainer.preemption_on_demand_checkpoint - and trainer.strategy.cluster_resolver - ): - on_demand_checkpoint_action = orbit.actions.SaveCheckpointIfPreempted( - trainer.strategy.cluster_resolver, - checkpoint_manager, - trainer.global_step, - keep_running_after_save=True, - ) - train_actions.append(on_demand_checkpoint_action) + # if ( + # params.trainer.preemption_on_demand_checkpoint + # and trainer.strategy.cluster_resolver + # ): + # on_demand_checkpoint_action = orbit.actions.SaveCheckpointIfPreempted( + # trainer.strategy.cluster_resolver, + # checkpoint_manager, + # trainer.global_step, + # keep_running_after_save=True, + # ) + # train_actions.append(on_demand_checkpoint_action) return train_actions diff --git a/models/official/core/base_trainer.py b/models/official/core/base_trainer.py index a341d128..9d31ce2d 100644 --- a/models/official/core/base_trainer.py +++ b/models/official/core/base_trainer.py @@ -335,6 +335,7 @@ def train_loop_end(self): # Maybe a self-implemented optimizer does not have `optimizer.iterations`. # So just to be safe here. if hasattr(self.optimizer, "iterations"): + logs["learning_rate"] = self.optimizer.learning_rate( self.optimizer.iterations) else: diff --git a/models/official/modeling/__pycache__/__init__.cpython-38.pyc b/models/official/modeling/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 00000000..da7494b5 Binary files /dev/null and b/models/official/modeling/__pycache__/__init__.cpython-38.pyc differ diff --git a/models/official/modeling/__pycache__/__init__.cpython-39.pyc b/models/official/modeling/__pycache__/__init__.cpython-39.pyc index da533414..cfe2a98c 100644 Binary files a/models/official/modeling/__pycache__/__init__.cpython-39.pyc and b/models/official/modeling/__pycache__/__init__.cpython-39.pyc differ diff --git a/models/official/modeling/__pycache__/performance.cpython-39.pyc b/models/official/modeling/__pycache__/performance.cpython-39.pyc index 86e209f6..1c8b31e7 100644 Binary files a/models/official/modeling/__pycache__/performance.cpython-39.pyc and b/models/official/modeling/__pycache__/performance.cpython-39.pyc differ diff --git a/models/official/modeling/__pycache__/tf_utils.cpython-39.pyc b/models/official/modeling/__pycache__/tf_utils.cpython-39.pyc index 1d54831c..a8d6387e 100644 Binary files a/models/official/modeling/__pycache__/tf_utils.cpython-39.pyc and b/models/official/modeling/__pycache__/tf_utils.cpython-39.pyc differ diff --git a/models/official/modeling/activations/__pycache__/__init__.cpython-39.pyc b/models/official/modeling/activations/__pycache__/__init__.cpython-39.pyc index 2b14d6c1..fa0c8b96 100644 Binary files a/models/official/modeling/activations/__pycache__/__init__.cpython-39.pyc and b/models/official/modeling/activations/__pycache__/__init__.cpython-39.pyc differ diff --git a/models/official/modeling/activations/__pycache__/gelu.cpython-39.pyc b/models/official/modeling/activations/__pycache__/gelu.cpython-39.pyc index 0b4a764a..a5b90f75 100644 Binary files a/models/official/modeling/activations/__pycache__/gelu.cpython-39.pyc and b/models/official/modeling/activations/__pycache__/gelu.cpython-39.pyc differ diff --git a/models/official/modeling/activations/__pycache__/mish.cpython-39.pyc b/models/official/modeling/activations/__pycache__/mish.cpython-39.pyc index abd0959e..565a8e40 100644 Binary files a/models/official/modeling/activations/__pycache__/mish.cpython-39.pyc and b/models/official/modeling/activations/__pycache__/mish.cpython-39.pyc differ diff --git a/models/official/modeling/activations/__pycache__/relu.cpython-39.pyc b/models/official/modeling/activations/__pycache__/relu.cpython-39.pyc index 96ca7701..98a01bb9 100644 Binary files a/models/official/modeling/activations/__pycache__/relu.cpython-39.pyc and b/models/official/modeling/activations/__pycache__/relu.cpython-39.pyc differ diff --git a/models/official/modeling/activations/__pycache__/sigmoid.cpython-39.pyc b/models/official/modeling/activations/__pycache__/sigmoid.cpython-39.pyc index f8a6214e..a9949d74 100644 Binary files a/models/official/modeling/activations/__pycache__/sigmoid.cpython-39.pyc and b/models/official/modeling/activations/__pycache__/sigmoid.cpython-39.pyc differ diff --git a/models/official/modeling/activations/__pycache__/swish.cpython-39.pyc b/models/official/modeling/activations/__pycache__/swish.cpython-39.pyc index fd58e1fa..61a870a8 100644 Binary files a/models/official/modeling/activations/__pycache__/swish.cpython-39.pyc and b/models/official/modeling/activations/__pycache__/swish.cpython-39.pyc differ diff --git a/models/official/modeling/hyperparams/__pycache__/__init__.cpython-38.pyc b/models/official/modeling/hyperparams/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 00000000..2e7b241e Binary files /dev/null and b/models/official/modeling/hyperparams/__pycache__/__init__.cpython-38.pyc differ diff --git a/models/official/modeling/hyperparams/__pycache__/__init__.cpython-39.pyc b/models/official/modeling/hyperparams/__pycache__/__init__.cpython-39.pyc index b2a2a7e3..3c2db46a 100644 Binary files a/models/official/modeling/hyperparams/__pycache__/__init__.cpython-39.pyc and b/models/official/modeling/hyperparams/__pycache__/__init__.cpython-39.pyc differ diff --git a/models/official/modeling/hyperparams/__pycache__/base_config.cpython-38.pyc b/models/official/modeling/hyperparams/__pycache__/base_config.cpython-38.pyc new file mode 100644 index 00000000..7b21ccbe Binary files /dev/null and b/models/official/modeling/hyperparams/__pycache__/base_config.cpython-38.pyc differ diff --git a/models/official/modeling/hyperparams/__pycache__/base_config.cpython-39.pyc b/models/official/modeling/hyperparams/__pycache__/base_config.cpython-39.pyc index 9b430b89..099a86d9 100644 Binary files a/models/official/modeling/hyperparams/__pycache__/base_config.cpython-39.pyc and b/models/official/modeling/hyperparams/__pycache__/base_config.cpython-39.pyc differ diff --git a/models/official/modeling/hyperparams/__pycache__/oneof.cpython-38.pyc b/models/official/modeling/hyperparams/__pycache__/oneof.cpython-38.pyc new file mode 100644 index 00000000..64617f5a Binary files /dev/null and b/models/official/modeling/hyperparams/__pycache__/oneof.cpython-38.pyc differ diff --git a/models/official/modeling/hyperparams/__pycache__/oneof.cpython-39.pyc b/models/official/modeling/hyperparams/__pycache__/oneof.cpython-39.pyc index 1e1ea8b6..3857e324 100644 Binary files a/models/official/modeling/hyperparams/__pycache__/oneof.cpython-39.pyc and b/models/official/modeling/hyperparams/__pycache__/oneof.cpython-39.pyc differ diff --git a/models/official/modeling/hyperparams/__pycache__/params_dict.cpython-38.pyc b/models/official/modeling/hyperparams/__pycache__/params_dict.cpython-38.pyc new file mode 100644 index 00000000..22eecdbd Binary files /dev/null and b/models/official/modeling/hyperparams/__pycache__/params_dict.cpython-38.pyc differ diff --git a/models/official/modeling/hyperparams/__pycache__/params_dict.cpython-39.pyc b/models/official/modeling/hyperparams/__pycache__/params_dict.cpython-39.pyc index 5149f39d..e3a90287 100644 Binary files a/models/official/modeling/hyperparams/__pycache__/params_dict.cpython-39.pyc and b/models/official/modeling/hyperparams/__pycache__/params_dict.cpython-39.pyc differ diff --git a/models/official/modeling/optimization/__pycache__/__init__.cpython-38.pyc b/models/official/modeling/optimization/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 00000000..de8d0f1b Binary files /dev/null and b/models/official/modeling/optimization/__pycache__/__init__.cpython-38.pyc differ diff --git a/models/official/modeling/optimization/__pycache__/__init__.cpython-39.pyc b/models/official/modeling/optimization/__pycache__/__init__.cpython-39.pyc index 3232e0c5..46e8ed4e 100644 Binary files a/models/official/modeling/optimization/__pycache__/__init__.cpython-39.pyc and b/models/official/modeling/optimization/__pycache__/__init__.cpython-39.pyc differ diff --git a/models/official/modeling/optimization/__pycache__/adafactor_optimizer.cpython-39.pyc b/models/official/modeling/optimization/__pycache__/adafactor_optimizer.cpython-39.pyc index 1f0d8567..c20f59f5 100644 Binary files a/models/official/modeling/optimization/__pycache__/adafactor_optimizer.cpython-39.pyc and b/models/official/modeling/optimization/__pycache__/adafactor_optimizer.cpython-39.pyc differ diff --git a/models/official/modeling/optimization/__pycache__/ema_optimizer.cpython-38.pyc b/models/official/modeling/optimization/__pycache__/ema_optimizer.cpython-38.pyc new file mode 100644 index 00000000..03c6e726 Binary files /dev/null and b/models/official/modeling/optimization/__pycache__/ema_optimizer.cpython-38.pyc differ diff --git a/models/official/modeling/optimization/__pycache__/ema_optimizer.cpython-39.pyc b/models/official/modeling/optimization/__pycache__/ema_optimizer.cpython-39.pyc index f06f80d8..cf4fcf4f 100644 Binary files a/models/official/modeling/optimization/__pycache__/ema_optimizer.cpython-39.pyc and b/models/official/modeling/optimization/__pycache__/ema_optimizer.cpython-39.pyc differ diff --git a/models/official/modeling/optimization/__pycache__/lars_optimizer.cpython-39.pyc b/models/official/modeling/optimization/__pycache__/lars_optimizer.cpython-39.pyc index e078244c..d10cbfb1 100644 Binary files a/models/official/modeling/optimization/__pycache__/lars_optimizer.cpython-39.pyc and b/models/official/modeling/optimization/__pycache__/lars_optimizer.cpython-39.pyc differ diff --git a/models/official/modeling/optimization/__pycache__/legacy_adamw.cpython-39.pyc b/models/official/modeling/optimization/__pycache__/legacy_adamw.cpython-39.pyc index f704eef7..1516b476 100644 Binary files a/models/official/modeling/optimization/__pycache__/legacy_adamw.cpython-39.pyc and b/models/official/modeling/optimization/__pycache__/legacy_adamw.cpython-39.pyc differ diff --git a/models/official/modeling/optimization/__pycache__/lr_schedule.cpython-39.pyc b/models/official/modeling/optimization/__pycache__/lr_schedule.cpython-39.pyc index b013e4b6..5db49f21 100644 Binary files a/models/official/modeling/optimization/__pycache__/lr_schedule.cpython-39.pyc and b/models/official/modeling/optimization/__pycache__/lr_schedule.cpython-39.pyc differ diff --git a/models/official/modeling/optimization/__pycache__/optimizer_factory.cpython-39.pyc b/models/official/modeling/optimization/__pycache__/optimizer_factory.cpython-39.pyc index 5e3ac41d..f0b70997 100644 Binary files a/models/official/modeling/optimization/__pycache__/optimizer_factory.cpython-39.pyc and b/models/official/modeling/optimization/__pycache__/optimizer_factory.cpython-39.pyc differ diff --git a/models/official/modeling/optimization/__pycache__/slide_optimizer.cpython-39.pyc b/models/official/modeling/optimization/__pycache__/slide_optimizer.cpython-39.pyc index 54b26a11..2836400f 100644 Binary files a/models/official/modeling/optimization/__pycache__/slide_optimizer.cpython-39.pyc and b/models/official/modeling/optimization/__pycache__/slide_optimizer.cpython-39.pyc differ diff --git a/models/official/modeling/optimization/configs/__pycache__/__init__.cpython-38.pyc b/models/official/modeling/optimization/configs/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 00000000..9c445f68 Binary files /dev/null and b/models/official/modeling/optimization/configs/__pycache__/__init__.cpython-38.pyc differ diff --git a/models/official/modeling/optimization/configs/__pycache__/__init__.cpython-39.pyc b/models/official/modeling/optimization/configs/__pycache__/__init__.cpython-39.pyc index ec6f4a06..07548c56 100644 Binary files a/models/official/modeling/optimization/configs/__pycache__/__init__.cpython-39.pyc and b/models/official/modeling/optimization/configs/__pycache__/__init__.cpython-39.pyc differ diff --git a/models/official/modeling/optimization/configs/__pycache__/learning_rate_config.cpython-38.pyc b/models/official/modeling/optimization/configs/__pycache__/learning_rate_config.cpython-38.pyc new file mode 100644 index 00000000..d005c232 Binary files /dev/null and b/models/official/modeling/optimization/configs/__pycache__/learning_rate_config.cpython-38.pyc differ diff --git a/models/official/modeling/optimization/configs/__pycache__/learning_rate_config.cpython-39.pyc b/models/official/modeling/optimization/configs/__pycache__/learning_rate_config.cpython-39.pyc index f2a2d05c..81eea9ea 100644 Binary files a/models/official/modeling/optimization/configs/__pycache__/learning_rate_config.cpython-39.pyc and b/models/official/modeling/optimization/configs/__pycache__/learning_rate_config.cpython-39.pyc differ diff --git a/models/official/modeling/optimization/configs/__pycache__/optimization_config.cpython-38.pyc b/models/official/modeling/optimization/configs/__pycache__/optimization_config.cpython-38.pyc new file mode 100644 index 00000000..0301247e Binary files /dev/null and b/models/official/modeling/optimization/configs/__pycache__/optimization_config.cpython-38.pyc differ diff --git a/models/official/modeling/optimization/configs/__pycache__/optimization_config.cpython-39.pyc b/models/official/modeling/optimization/configs/__pycache__/optimization_config.cpython-39.pyc index 0fbb937c..6ed37d40 100644 Binary files a/models/official/modeling/optimization/configs/__pycache__/optimization_config.cpython-39.pyc and b/models/official/modeling/optimization/configs/__pycache__/optimization_config.cpython-39.pyc differ diff --git a/models/official/modeling/optimization/configs/__pycache__/optimizer_config.cpython-38.pyc b/models/official/modeling/optimization/configs/__pycache__/optimizer_config.cpython-38.pyc new file mode 100644 index 00000000..1ef1d15b Binary files /dev/null and b/models/official/modeling/optimization/configs/__pycache__/optimizer_config.cpython-38.pyc differ diff --git a/models/official/modeling/optimization/configs/__pycache__/optimizer_config.cpython-39.pyc b/models/official/modeling/optimization/configs/__pycache__/optimizer_config.cpython-39.pyc index af04e9ff..32568dee 100644 Binary files a/models/official/modeling/optimization/configs/__pycache__/optimizer_config.cpython-39.pyc and b/models/official/modeling/optimization/configs/__pycache__/optimizer_config.cpython-39.pyc differ diff --git a/models/official/modeling/privacy/__pycache__/__init__.cpython-39.pyc b/models/official/modeling/privacy/__pycache__/__init__.cpython-39.pyc index f88cc84b..9875d9c5 100644 Binary files a/models/official/modeling/privacy/__pycache__/__init__.cpython-39.pyc and b/models/official/modeling/privacy/__pycache__/__init__.cpython-39.pyc differ diff --git a/models/official/modeling/privacy/__pycache__/configs.cpython-39.pyc b/models/official/modeling/privacy/__pycache__/configs.cpython-39.pyc index 5ccbf940..e37fbcc9 100644 Binary files a/models/official/modeling/privacy/__pycache__/configs.cpython-39.pyc and b/models/official/modeling/privacy/__pycache__/configs.cpython-39.pyc differ diff --git a/models/official/modeling/privacy/__pycache__/ops.cpython-39.pyc b/models/official/modeling/privacy/__pycache__/ops.cpython-39.pyc index 982cb223..bb571fe4 100644 Binary files a/models/official/modeling/privacy/__pycache__/ops.cpython-39.pyc and b/models/official/modeling/privacy/__pycache__/ops.cpython-39.pyc differ diff --git a/models/official/nlp/__pycache__/__init__.cpython-39.pyc b/models/official/nlp/__pycache__/__init__.cpython-39.pyc index a8f61d00..2131f4ec 100644 Binary files a/models/official/nlp/__pycache__/__init__.cpython-39.pyc and b/models/official/nlp/__pycache__/__init__.cpython-39.pyc differ diff --git a/models/official/nlp/modeling/__pycache__/__init__.cpython-39.pyc b/models/official/nlp/modeling/__pycache__/__init__.cpython-39.pyc index edad3760..7f2f1583 100644 Binary files a/models/official/nlp/modeling/__pycache__/__init__.cpython-39.pyc and b/models/official/nlp/modeling/__pycache__/__init__.cpython-39.pyc differ diff --git a/models/official/nlp/modeling/layers/__pycache__/__init__.cpython-39.pyc b/models/official/nlp/modeling/layers/__pycache__/__init__.cpython-39.pyc index c442e6c7..b610bd92 100644 Binary files a/models/official/nlp/modeling/layers/__pycache__/__init__.cpython-39.pyc and b/models/official/nlp/modeling/layers/__pycache__/__init__.cpython-39.pyc differ diff --git a/models/official/nlp/modeling/layers/__pycache__/attention.cpython-39.pyc b/models/official/nlp/modeling/layers/__pycache__/attention.cpython-39.pyc index a27f18ac..ca299f63 100644 Binary files a/models/official/nlp/modeling/layers/__pycache__/attention.cpython-39.pyc and b/models/official/nlp/modeling/layers/__pycache__/attention.cpython-39.pyc differ diff --git a/models/official/nlp/modeling/layers/__pycache__/bigbird_attention.cpython-39.pyc b/models/official/nlp/modeling/layers/__pycache__/bigbird_attention.cpython-39.pyc index c63bb5da..2a149516 100644 Binary files a/models/official/nlp/modeling/layers/__pycache__/bigbird_attention.cpython-39.pyc and b/models/official/nlp/modeling/layers/__pycache__/bigbird_attention.cpython-39.pyc differ diff --git a/models/official/nlp/modeling/layers/__pycache__/block_diag_feedforward.cpython-39.pyc b/models/official/nlp/modeling/layers/__pycache__/block_diag_feedforward.cpython-39.pyc index ee41bc82..8bb39b1b 100644 Binary files a/models/official/nlp/modeling/layers/__pycache__/block_diag_feedforward.cpython-39.pyc and b/models/official/nlp/modeling/layers/__pycache__/block_diag_feedforward.cpython-39.pyc differ diff --git a/models/official/nlp/modeling/layers/__pycache__/cls_head.cpython-39.pyc b/models/official/nlp/modeling/layers/__pycache__/cls_head.cpython-39.pyc index b612caa3..ca6aea18 100644 Binary files a/models/official/nlp/modeling/layers/__pycache__/cls_head.cpython-39.pyc and b/models/official/nlp/modeling/layers/__pycache__/cls_head.cpython-39.pyc differ diff --git a/models/official/nlp/modeling/layers/__pycache__/factorized_embedding.cpython-39.pyc b/models/official/nlp/modeling/layers/__pycache__/factorized_embedding.cpython-39.pyc index 1892c3db..91aa6193 100644 Binary files a/models/official/nlp/modeling/layers/__pycache__/factorized_embedding.cpython-39.pyc and b/models/official/nlp/modeling/layers/__pycache__/factorized_embedding.cpython-39.pyc differ diff --git a/models/official/nlp/modeling/layers/__pycache__/gated_feedforward.cpython-39.pyc b/models/official/nlp/modeling/layers/__pycache__/gated_feedforward.cpython-39.pyc index 41e9d840..2f8fd696 100644 Binary files a/models/official/nlp/modeling/layers/__pycache__/gated_feedforward.cpython-39.pyc and b/models/official/nlp/modeling/layers/__pycache__/gated_feedforward.cpython-39.pyc differ diff --git a/models/official/nlp/modeling/layers/__pycache__/gaussian_process.cpython-39.pyc b/models/official/nlp/modeling/layers/__pycache__/gaussian_process.cpython-39.pyc index 77d8d945..24d50ffe 100644 Binary files a/models/official/nlp/modeling/layers/__pycache__/gaussian_process.cpython-39.pyc and b/models/official/nlp/modeling/layers/__pycache__/gaussian_process.cpython-39.pyc differ diff --git a/models/official/nlp/modeling/layers/__pycache__/kernel_attention.cpython-39.pyc b/models/official/nlp/modeling/layers/__pycache__/kernel_attention.cpython-39.pyc index 10458f94..e12ed0e0 100644 Binary files a/models/official/nlp/modeling/layers/__pycache__/kernel_attention.cpython-39.pyc and b/models/official/nlp/modeling/layers/__pycache__/kernel_attention.cpython-39.pyc differ diff --git a/models/official/nlp/modeling/layers/__pycache__/masked_lm.cpython-39.pyc b/models/official/nlp/modeling/layers/__pycache__/masked_lm.cpython-39.pyc index c8949aca..b6299296 100644 Binary files a/models/official/nlp/modeling/layers/__pycache__/masked_lm.cpython-39.pyc and b/models/official/nlp/modeling/layers/__pycache__/masked_lm.cpython-39.pyc differ diff --git a/models/official/nlp/modeling/layers/__pycache__/masked_softmax.cpython-39.pyc b/models/official/nlp/modeling/layers/__pycache__/masked_softmax.cpython-39.pyc index 8ccff30b..abe7a55b 100644 Binary files a/models/official/nlp/modeling/layers/__pycache__/masked_softmax.cpython-39.pyc and b/models/official/nlp/modeling/layers/__pycache__/masked_softmax.cpython-39.pyc differ diff --git a/models/official/nlp/modeling/layers/__pycache__/mat_mul_with_margin.cpython-39.pyc b/models/official/nlp/modeling/layers/__pycache__/mat_mul_with_margin.cpython-39.pyc index d089e23e..f726d4c0 100644 Binary files a/models/official/nlp/modeling/layers/__pycache__/mat_mul_with_margin.cpython-39.pyc and b/models/official/nlp/modeling/layers/__pycache__/mat_mul_with_margin.cpython-39.pyc differ diff --git a/models/official/nlp/modeling/layers/__pycache__/mixing.cpython-39.pyc b/models/official/nlp/modeling/layers/__pycache__/mixing.cpython-39.pyc index 8d750e95..a31773cf 100644 Binary files a/models/official/nlp/modeling/layers/__pycache__/mixing.cpython-39.pyc and b/models/official/nlp/modeling/layers/__pycache__/mixing.cpython-39.pyc differ diff --git a/models/official/nlp/modeling/layers/__pycache__/mobile_bert_layers.cpython-39.pyc b/models/official/nlp/modeling/layers/__pycache__/mobile_bert_layers.cpython-39.pyc index efa604df..36b480b6 100644 Binary files a/models/official/nlp/modeling/layers/__pycache__/mobile_bert_layers.cpython-39.pyc and b/models/official/nlp/modeling/layers/__pycache__/mobile_bert_layers.cpython-39.pyc differ diff --git a/models/official/nlp/modeling/layers/__pycache__/moe.cpython-39.pyc b/models/official/nlp/modeling/layers/__pycache__/moe.cpython-39.pyc index 73631882..96a68c43 100644 Binary files a/models/official/nlp/modeling/layers/__pycache__/moe.cpython-39.pyc and b/models/official/nlp/modeling/layers/__pycache__/moe.cpython-39.pyc differ diff --git a/models/official/nlp/modeling/layers/__pycache__/multi_channel_attention.cpython-39.pyc b/models/official/nlp/modeling/layers/__pycache__/multi_channel_attention.cpython-39.pyc index f24b7267..7d9d6263 100644 Binary files a/models/official/nlp/modeling/layers/__pycache__/multi_channel_attention.cpython-39.pyc and b/models/official/nlp/modeling/layers/__pycache__/multi_channel_attention.cpython-39.pyc differ diff --git a/models/official/nlp/modeling/layers/__pycache__/on_device_embedding.cpython-39.pyc b/models/official/nlp/modeling/layers/__pycache__/on_device_embedding.cpython-39.pyc index 694d9a50..50bd9797 100644 Binary files a/models/official/nlp/modeling/layers/__pycache__/on_device_embedding.cpython-39.pyc and b/models/official/nlp/modeling/layers/__pycache__/on_device_embedding.cpython-39.pyc differ diff --git a/models/official/nlp/modeling/layers/__pycache__/pack_optimization.cpython-39.pyc b/models/official/nlp/modeling/layers/__pycache__/pack_optimization.cpython-39.pyc index 1bfbb1de..d94a6c2a 100644 Binary files a/models/official/nlp/modeling/layers/__pycache__/pack_optimization.cpython-39.pyc and b/models/official/nlp/modeling/layers/__pycache__/pack_optimization.cpython-39.pyc differ diff --git a/models/official/nlp/modeling/layers/__pycache__/per_dim_scale_attention.cpython-39.pyc b/models/official/nlp/modeling/layers/__pycache__/per_dim_scale_attention.cpython-39.pyc index d009cd48..723903a9 100644 Binary files a/models/official/nlp/modeling/layers/__pycache__/per_dim_scale_attention.cpython-39.pyc and b/models/official/nlp/modeling/layers/__pycache__/per_dim_scale_attention.cpython-39.pyc differ diff --git a/models/official/nlp/modeling/layers/__pycache__/position_embedding.cpython-39.pyc b/models/official/nlp/modeling/layers/__pycache__/position_embedding.cpython-39.pyc index 4f05967e..12cf9cf1 100644 Binary files a/models/official/nlp/modeling/layers/__pycache__/position_embedding.cpython-39.pyc and b/models/official/nlp/modeling/layers/__pycache__/position_embedding.cpython-39.pyc differ diff --git a/models/official/nlp/modeling/layers/__pycache__/relative_attention.cpython-39.pyc b/models/official/nlp/modeling/layers/__pycache__/relative_attention.cpython-39.pyc index 90b1e8f1..a4e760aa 100644 Binary files a/models/official/nlp/modeling/layers/__pycache__/relative_attention.cpython-39.pyc and b/models/official/nlp/modeling/layers/__pycache__/relative_attention.cpython-39.pyc differ diff --git a/models/official/nlp/modeling/layers/__pycache__/reuse_attention.cpython-39.pyc b/models/official/nlp/modeling/layers/__pycache__/reuse_attention.cpython-39.pyc index 85629c4a..5a53a6d6 100644 Binary files a/models/official/nlp/modeling/layers/__pycache__/reuse_attention.cpython-39.pyc and b/models/official/nlp/modeling/layers/__pycache__/reuse_attention.cpython-39.pyc differ diff --git a/models/official/nlp/modeling/layers/__pycache__/reuse_transformer.cpython-39.pyc b/models/official/nlp/modeling/layers/__pycache__/reuse_transformer.cpython-39.pyc index 5a99c811..f4460b05 100644 Binary files a/models/official/nlp/modeling/layers/__pycache__/reuse_transformer.cpython-39.pyc and b/models/official/nlp/modeling/layers/__pycache__/reuse_transformer.cpython-39.pyc differ diff --git a/models/official/nlp/modeling/layers/__pycache__/rezero_transformer.cpython-39.pyc b/models/official/nlp/modeling/layers/__pycache__/rezero_transformer.cpython-39.pyc index 3d01c62a..67c11f86 100644 Binary files a/models/official/nlp/modeling/layers/__pycache__/rezero_transformer.cpython-39.pyc and b/models/official/nlp/modeling/layers/__pycache__/rezero_transformer.cpython-39.pyc differ diff --git a/models/official/nlp/modeling/layers/__pycache__/routing.cpython-39.pyc b/models/official/nlp/modeling/layers/__pycache__/routing.cpython-39.pyc index 510e0320..0120a97a 100644 Binary files a/models/official/nlp/modeling/layers/__pycache__/routing.cpython-39.pyc and b/models/official/nlp/modeling/layers/__pycache__/routing.cpython-39.pyc differ diff --git a/models/official/nlp/modeling/layers/__pycache__/self_attention_mask.cpython-39.pyc b/models/official/nlp/modeling/layers/__pycache__/self_attention_mask.cpython-39.pyc index d7977b51..a0e7ceca 100644 Binary files a/models/official/nlp/modeling/layers/__pycache__/self_attention_mask.cpython-39.pyc and b/models/official/nlp/modeling/layers/__pycache__/self_attention_mask.cpython-39.pyc differ diff --git a/models/official/nlp/modeling/layers/__pycache__/spectral_normalization.cpython-39.pyc b/models/official/nlp/modeling/layers/__pycache__/spectral_normalization.cpython-39.pyc index 06776b91..ca2c619e 100644 Binary files a/models/official/nlp/modeling/layers/__pycache__/spectral_normalization.cpython-39.pyc and b/models/official/nlp/modeling/layers/__pycache__/spectral_normalization.cpython-39.pyc differ diff --git a/models/official/nlp/modeling/layers/__pycache__/talking_heads_attention.cpython-39.pyc b/models/official/nlp/modeling/layers/__pycache__/talking_heads_attention.cpython-39.pyc index 46dfe514..395eb9ef 100644 Binary files a/models/official/nlp/modeling/layers/__pycache__/talking_heads_attention.cpython-39.pyc and b/models/official/nlp/modeling/layers/__pycache__/talking_heads_attention.cpython-39.pyc differ diff --git a/models/official/nlp/modeling/layers/__pycache__/text_layers.cpython-39.pyc b/models/official/nlp/modeling/layers/__pycache__/text_layers.cpython-39.pyc index 6a028690..d59d6275 100644 Binary files a/models/official/nlp/modeling/layers/__pycache__/text_layers.cpython-39.pyc and b/models/official/nlp/modeling/layers/__pycache__/text_layers.cpython-39.pyc differ diff --git a/models/official/nlp/modeling/layers/__pycache__/tn_expand_condense.cpython-39.pyc b/models/official/nlp/modeling/layers/__pycache__/tn_expand_condense.cpython-39.pyc index 6d449224..f9d35bff 100644 Binary files a/models/official/nlp/modeling/layers/__pycache__/tn_expand_condense.cpython-39.pyc and b/models/official/nlp/modeling/layers/__pycache__/tn_expand_condense.cpython-39.pyc differ diff --git a/models/official/nlp/modeling/layers/__pycache__/tn_transformer_expand_condense.cpython-39.pyc b/models/official/nlp/modeling/layers/__pycache__/tn_transformer_expand_condense.cpython-39.pyc index 319152d0..b9e305ca 100644 Binary files a/models/official/nlp/modeling/layers/__pycache__/tn_transformer_expand_condense.cpython-39.pyc and b/models/official/nlp/modeling/layers/__pycache__/tn_transformer_expand_condense.cpython-39.pyc differ diff --git a/models/official/nlp/modeling/layers/__pycache__/transformer.cpython-39.pyc b/models/official/nlp/modeling/layers/__pycache__/transformer.cpython-39.pyc index 4afc2e33..17ed283a 100644 Binary files a/models/official/nlp/modeling/layers/__pycache__/transformer.cpython-39.pyc and b/models/official/nlp/modeling/layers/__pycache__/transformer.cpython-39.pyc differ diff --git a/models/official/nlp/modeling/layers/__pycache__/transformer_encoder_block.cpython-39.pyc b/models/official/nlp/modeling/layers/__pycache__/transformer_encoder_block.cpython-39.pyc index 970872a4..1ab39f3d 100644 Binary files a/models/official/nlp/modeling/layers/__pycache__/transformer_encoder_block.cpython-39.pyc and b/models/official/nlp/modeling/layers/__pycache__/transformer_encoder_block.cpython-39.pyc differ diff --git a/models/official/nlp/modeling/layers/__pycache__/transformer_scaffold.cpython-39.pyc b/models/official/nlp/modeling/layers/__pycache__/transformer_scaffold.cpython-39.pyc index 71cdb494..e10c3986 100644 Binary files a/models/official/nlp/modeling/layers/__pycache__/transformer_scaffold.cpython-39.pyc and b/models/official/nlp/modeling/layers/__pycache__/transformer_scaffold.cpython-39.pyc differ diff --git a/models/official/nlp/modeling/layers/__pycache__/transformer_xl.cpython-39.pyc b/models/official/nlp/modeling/layers/__pycache__/transformer_xl.cpython-39.pyc index b532a170..4b7bcd94 100644 Binary files a/models/official/nlp/modeling/layers/__pycache__/transformer_xl.cpython-39.pyc and b/models/official/nlp/modeling/layers/__pycache__/transformer_xl.cpython-39.pyc differ diff --git a/models/official/nlp/modeling/layers/__pycache__/util.cpython-39.pyc b/models/official/nlp/modeling/layers/__pycache__/util.cpython-39.pyc index ccd2db52..1035b511 100644 Binary files a/models/official/nlp/modeling/layers/__pycache__/util.cpython-39.pyc and b/models/official/nlp/modeling/layers/__pycache__/util.cpython-39.pyc differ diff --git a/models/official/nlp/modeling/losses/__pycache__/__init__.cpython-39.pyc b/models/official/nlp/modeling/losses/__pycache__/__init__.cpython-39.pyc index e9798617..208691ec 100644 Binary files a/models/official/nlp/modeling/losses/__pycache__/__init__.cpython-39.pyc and b/models/official/nlp/modeling/losses/__pycache__/__init__.cpython-39.pyc differ diff --git a/models/official/nlp/modeling/losses/__pycache__/weighted_sparse_categorical_crossentropy.cpython-39.pyc b/models/official/nlp/modeling/losses/__pycache__/weighted_sparse_categorical_crossentropy.cpython-39.pyc index b8518609..c33404a7 100644 Binary files a/models/official/nlp/modeling/losses/__pycache__/weighted_sparse_categorical_crossentropy.cpython-39.pyc and b/models/official/nlp/modeling/losses/__pycache__/weighted_sparse_categorical_crossentropy.cpython-39.pyc differ diff --git a/models/official/nlp/modeling/models/__pycache__/__init__.cpython-39.pyc b/models/official/nlp/modeling/models/__pycache__/__init__.cpython-39.pyc index 4e8860ce..00e64f2c 100644 Binary files a/models/official/nlp/modeling/models/__pycache__/__init__.cpython-39.pyc and b/models/official/nlp/modeling/models/__pycache__/__init__.cpython-39.pyc differ diff --git a/models/official/nlp/modeling/models/__pycache__/bert_classifier.cpython-39.pyc b/models/official/nlp/modeling/models/__pycache__/bert_classifier.cpython-39.pyc index 9ebec868..ef63a127 100644 Binary files a/models/official/nlp/modeling/models/__pycache__/bert_classifier.cpython-39.pyc and b/models/official/nlp/modeling/models/__pycache__/bert_classifier.cpython-39.pyc differ diff --git a/models/official/nlp/modeling/models/__pycache__/bert_pretrainer.cpython-39.pyc b/models/official/nlp/modeling/models/__pycache__/bert_pretrainer.cpython-39.pyc index 5dc46f2d..755d15d7 100644 Binary files a/models/official/nlp/modeling/models/__pycache__/bert_pretrainer.cpython-39.pyc and b/models/official/nlp/modeling/models/__pycache__/bert_pretrainer.cpython-39.pyc differ diff --git a/models/official/nlp/modeling/models/__pycache__/bert_span_labeler.cpython-39.pyc b/models/official/nlp/modeling/models/__pycache__/bert_span_labeler.cpython-39.pyc index 88a525aa..eb2a2499 100644 Binary files a/models/official/nlp/modeling/models/__pycache__/bert_span_labeler.cpython-39.pyc and b/models/official/nlp/modeling/models/__pycache__/bert_span_labeler.cpython-39.pyc differ diff --git a/models/official/nlp/modeling/models/__pycache__/bert_token_classifier.cpython-39.pyc b/models/official/nlp/modeling/models/__pycache__/bert_token_classifier.cpython-39.pyc index 0bef8f84..e62c16e8 100644 Binary files a/models/official/nlp/modeling/models/__pycache__/bert_token_classifier.cpython-39.pyc and b/models/official/nlp/modeling/models/__pycache__/bert_token_classifier.cpython-39.pyc differ diff --git a/models/official/nlp/modeling/models/__pycache__/dual_encoder.cpython-39.pyc b/models/official/nlp/modeling/models/__pycache__/dual_encoder.cpython-39.pyc index 69945deb..a2e69a7a 100644 Binary files a/models/official/nlp/modeling/models/__pycache__/dual_encoder.cpython-39.pyc and b/models/official/nlp/modeling/models/__pycache__/dual_encoder.cpython-39.pyc differ diff --git a/models/official/nlp/modeling/models/__pycache__/electra_pretrainer.cpython-39.pyc b/models/official/nlp/modeling/models/__pycache__/electra_pretrainer.cpython-39.pyc index 4ec9ba74..41e27fa3 100644 Binary files a/models/official/nlp/modeling/models/__pycache__/electra_pretrainer.cpython-39.pyc and b/models/official/nlp/modeling/models/__pycache__/electra_pretrainer.cpython-39.pyc differ diff --git a/models/official/nlp/modeling/models/__pycache__/seq2seq_transformer.cpython-39.pyc b/models/official/nlp/modeling/models/__pycache__/seq2seq_transformer.cpython-39.pyc index 0ddc6f03..baf396e3 100644 Binary files a/models/official/nlp/modeling/models/__pycache__/seq2seq_transformer.cpython-39.pyc and b/models/official/nlp/modeling/models/__pycache__/seq2seq_transformer.cpython-39.pyc differ diff --git a/models/official/nlp/modeling/models/__pycache__/t5.cpython-39.pyc b/models/official/nlp/modeling/models/__pycache__/t5.cpython-39.pyc index 562c963a..602fd298 100644 Binary files a/models/official/nlp/modeling/models/__pycache__/t5.cpython-39.pyc and b/models/official/nlp/modeling/models/__pycache__/t5.cpython-39.pyc differ diff --git a/models/official/nlp/modeling/models/__pycache__/xlnet.cpython-39.pyc b/models/official/nlp/modeling/models/__pycache__/xlnet.cpython-39.pyc index c30822a4..d748b665 100644 Binary files a/models/official/nlp/modeling/models/__pycache__/xlnet.cpython-39.pyc and b/models/official/nlp/modeling/models/__pycache__/xlnet.cpython-39.pyc differ diff --git a/models/official/nlp/modeling/networks/__pycache__/__init__.cpython-39.pyc b/models/official/nlp/modeling/networks/__pycache__/__init__.cpython-39.pyc index 6f7da03d..139ae631 100644 Binary files a/models/official/nlp/modeling/networks/__pycache__/__init__.cpython-39.pyc and b/models/official/nlp/modeling/networks/__pycache__/__init__.cpython-39.pyc differ diff --git a/models/official/nlp/modeling/networks/__pycache__/albert_encoder.cpython-39.pyc b/models/official/nlp/modeling/networks/__pycache__/albert_encoder.cpython-39.pyc index a668f8f6..ba070ac6 100644 Binary files a/models/official/nlp/modeling/networks/__pycache__/albert_encoder.cpython-39.pyc and b/models/official/nlp/modeling/networks/__pycache__/albert_encoder.cpython-39.pyc differ diff --git a/models/official/nlp/modeling/networks/__pycache__/bert_encoder.cpython-39.pyc b/models/official/nlp/modeling/networks/__pycache__/bert_encoder.cpython-39.pyc index dd51c8f6..828c0959 100644 Binary files a/models/official/nlp/modeling/networks/__pycache__/bert_encoder.cpython-39.pyc and b/models/official/nlp/modeling/networks/__pycache__/bert_encoder.cpython-39.pyc differ diff --git a/models/official/nlp/modeling/networks/__pycache__/classification.cpython-39.pyc b/models/official/nlp/modeling/networks/__pycache__/classification.cpython-39.pyc index 3b844db5..f683749e 100644 Binary files a/models/official/nlp/modeling/networks/__pycache__/classification.cpython-39.pyc and b/models/official/nlp/modeling/networks/__pycache__/classification.cpython-39.pyc differ diff --git a/models/official/nlp/modeling/networks/__pycache__/encoder_scaffold.cpython-39.pyc b/models/official/nlp/modeling/networks/__pycache__/encoder_scaffold.cpython-39.pyc index 66a6db63..74ea7bf1 100644 Binary files a/models/official/nlp/modeling/networks/__pycache__/encoder_scaffold.cpython-39.pyc and b/models/official/nlp/modeling/networks/__pycache__/encoder_scaffold.cpython-39.pyc differ diff --git a/models/official/nlp/modeling/networks/__pycache__/fnet.cpython-39.pyc b/models/official/nlp/modeling/networks/__pycache__/fnet.cpython-39.pyc index 532f2b85..10c8508f 100644 Binary files a/models/official/nlp/modeling/networks/__pycache__/fnet.cpython-39.pyc and b/models/official/nlp/modeling/networks/__pycache__/fnet.cpython-39.pyc differ diff --git a/models/official/nlp/modeling/networks/__pycache__/funnel_transformer.cpython-39.pyc b/models/official/nlp/modeling/networks/__pycache__/funnel_transformer.cpython-39.pyc index d2e8c2b4..6d0f0d02 100644 Binary files a/models/official/nlp/modeling/networks/__pycache__/funnel_transformer.cpython-39.pyc and b/models/official/nlp/modeling/networks/__pycache__/funnel_transformer.cpython-39.pyc differ diff --git a/models/official/nlp/modeling/networks/__pycache__/mobile_bert_encoder.cpython-39.pyc b/models/official/nlp/modeling/networks/__pycache__/mobile_bert_encoder.cpython-39.pyc index bc0974be..c3fee802 100644 Binary files a/models/official/nlp/modeling/networks/__pycache__/mobile_bert_encoder.cpython-39.pyc and b/models/official/nlp/modeling/networks/__pycache__/mobile_bert_encoder.cpython-39.pyc differ diff --git a/models/official/nlp/modeling/networks/__pycache__/packed_sequence_embedding.cpython-39.pyc b/models/official/nlp/modeling/networks/__pycache__/packed_sequence_embedding.cpython-39.pyc index 434d9b86..b44697da 100644 Binary files a/models/official/nlp/modeling/networks/__pycache__/packed_sequence_embedding.cpython-39.pyc and b/models/official/nlp/modeling/networks/__pycache__/packed_sequence_embedding.cpython-39.pyc differ diff --git a/models/official/nlp/modeling/networks/__pycache__/span_labeling.cpython-39.pyc b/models/official/nlp/modeling/networks/__pycache__/span_labeling.cpython-39.pyc index 0a07461e..ff2d9e62 100644 Binary files a/models/official/nlp/modeling/networks/__pycache__/span_labeling.cpython-39.pyc and b/models/official/nlp/modeling/networks/__pycache__/span_labeling.cpython-39.pyc differ diff --git a/models/official/nlp/modeling/networks/__pycache__/sparse_mixer.cpython-39.pyc b/models/official/nlp/modeling/networks/__pycache__/sparse_mixer.cpython-39.pyc index 73984f98..0ae32a30 100644 Binary files a/models/official/nlp/modeling/networks/__pycache__/sparse_mixer.cpython-39.pyc and b/models/official/nlp/modeling/networks/__pycache__/sparse_mixer.cpython-39.pyc differ diff --git a/models/official/nlp/modeling/networks/__pycache__/xlnet_base.cpython-39.pyc b/models/official/nlp/modeling/networks/__pycache__/xlnet_base.cpython-39.pyc index d827627b..203dfdf1 100644 Binary files a/models/official/nlp/modeling/networks/__pycache__/xlnet_base.cpython-39.pyc and b/models/official/nlp/modeling/networks/__pycache__/xlnet_base.cpython-39.pyc differ diff --git a/models/official/nlp/modeling/ops/__pycache__/__init__.cpython-39.pyc b/models/official/nlp/modeling/ops/__pycache__/__init__.cpython-39.pyc index 2f94e62f..cf75ae1b 100644 Binary files a/models/official/nlp/modeling/ops/__pycache__/__init__.cpython-39.pyc and b/models/official/nlp/modeling/ops/__pycache__/__init__.cpython-39.pyc differ diff --git a/models/official/nlp/modeling/ops/__pycache__/beam_search.cpython-39.pyc b/models/official/nlp/modeling/ops/__pycache__/beam_search.cpython-39.pyc index 42f40d4c..1813314c 100644 Binary files a/models/official/nlp/modeling/ops/__pycache__/beam_search.cpython-39.pyc and b/models/official/nlp/modeling/ops/__pycache__/beam_search.cpython-39.pyc differ diff --git a/models/official/nlp/modeling/ops/__pycache__/decoding_module.cpython-39.pyc b/models/official/nlp/modeling/ops/__pycache__/decoding_module.cpython-39.pyc index da14a917..2e660854 100644 Binary files a/models/official/nlp/modeling/ops/__pycache__/decoding_module.cpython-39.pyc and b/models/official/nlp/modeling/ops/__pycache__/decoding_module.cpython-39.pyc differ diff --git a/models/official/nlp/modeling/ops/__pycache__/sampling_module.cpython-39.pyc b/models/official/nlp/modeling/ops/__pycache__/sampling_module.cpython-39.pyc index 49a29a29..a8da6b5a 100644 Binary files a/models/official/nlp/modeling/ops/__pycache__/sampling_module.cpython-39.pyc and b/models/official/nlp/modeling/ops/__pycache__/sampling_module.cpython-39.pyc differ diff --git a/models/official/nlp/modeling/ops/__pycache__/segment_extractor.cpython-39.pyc b/models/official/nlp/modeling/ops/__pycache__/segment_extractor.cpython-39.pyc index 98013f8f..68891285 100644 Binary files a/models/official/nlp/modeling/ops/__pycache__/segment_extractor.cpython-39.pyc and b/models/official/nlp/modeling/ops/__pycache__/segment_extractor.cpython-39.pyc differ diff --git a/models/official/projects/__pycache__/__init__.cpython-38.pyc b/models/official/projects/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 00000000..49aea1f5 Binary files /dev/null and b/models/official/projects/__pycache__/__init__.cpython-38.pyc differ diff --git a/models/official/projects/__pycache__/__init__.cpython-39.pyc b/models/official/projects/__pycache__/__init__.cpython-39.pyc index d869b0b9..b5ef6791 100644 Binary files a/models/official/projects/__pycache__/__init__.cpython-39.pyc and b/models/official/projects/__pycache__/__init__.cpython-39.pyc differ diff --git a/models/official/projects/detr/configs/__pycache__/detr.cpython-39.pyc b/models/official/projects/detr/configs/__pycache__/detr.cpython-39.pyc new file mode 100644 index 00000000..a17d47f9 Binary files /dev/null and b/models/official/projects/detr/configs/__pycache__/detr.cpython-39.pyc differ diff --git a/models/official/projects/detr/dataloaders/__pycache__/detr_input.cpython-39.pyc b/models/official/projects/detr/dataloaders/__pycache__/detr_input.cpython-39.pyc new file mode 100644 index 00000000..7406f11d Binary files /dev/null and b/models/official/projects/detr/dataloaders/__pycache__/detr_input.cpython-39.pyc differ diff --git a/models/official/projects/detr/dataloaders/coco.py b/models/official/projects/detr/dataloaders/coco.py index cf0835b8..54cf4bb8 100644 --- a/models/official/projects/detr/dataloaders/coco.py +++ b/models/official/projects/detr/dataloaders/coco.py @@ -116,7 +116,7 @@ def preprocess(self, inputs): classes, self._params.max_num_boxes), 'boxes': preprocess_ops.clip_or_pad_to_fixed_size( - boxes, self._params.max_num_boxes) + boxes, self._params.max_num_boxes) # [4, 100] } if not self._params.is_training: labels.update({ diff --git a/models/official/projects/detr/experiments/detr_r50_300epochs.sh b/models/official/projects/detr/experiments/detr_r50_300epochs.sh index 162f9743..8fb91a63 100644 --- a/models/official/projects/detr/experiments/detr_r50_300epochs.sh +++ b/models/official/projects/detr/experiments/detr_r50_300epochs.sh @@ -2,5 +2,6 @@ python3 official/projects/detr/train.py \ --experiment=detr_coco \ --mode=train_and_eval \ - --model_dir=/tmp/logging_dir/ \ + --model_dir="./" \ --params_override=task.init_checkpoint='gs://tf_model_garden/vision/resnet50_imagenet/ckpt-62400',trainer.train_steps=554400,trainer.optimizer_config.learning_rate.stepwise.boundaries="[369600]" + diff --git a/models/official/projects/detr/modeling/__pycache__/detr.cpython-39.pyc b/models/official/projects/detr/modeling/__pycache__/detr.cpython-39.pyc index 47452307..e33801d6 100644 Binary files a/models/official/projects/detr/modeling/__pycache__/detr.cpython-39.pyc and b/models/official/projects/detr/modeling/__pycache__/detr.cpython-39.pyc differ diff --git a/models/official/projects/detr/modeling/__pycache__/transformer.cpython-39.pyc b/models/official/projects/detr/modeling/__pycache__/transformer.cpython-39.pyc index e01185b5..6863e750 100644 Binary files a/models/official/projects/detr/modeling/__pycache__/transformer.cpython-39.pyc and b/models/official/projects/detr/modeling/__pycache__/transformer.cpython-39.pyc differ diff --git a/models/official/projects/detr/ops/__pycache__/matchers.cpython-39.pyc b/models/official/projects/detr/ops/__pycache__/matchers.cpython-39.pyc new file mode 100644 index 00000000..1dab4e3c Binary files /dev/null and b/models/official/projects/detr/ops/__pycache__/matchers.cpython-39.pyc differ diff --git a/models/official/projects/detr/ops/matchers.py b/models/official/projects/detr/ops/matchers.py index fcda929f..e27b7115 100644 --- a/models/official/projects/detr/ops/matchers.py +++ b/models/official/projects/detr/ops/matchers.py @@ -27,9 +27,9 @@ Based on the original implementation by Jiquan Ngiam . """ import tensorflow as tf +# from official.modeling import tf_utils from official.modeling import tf_utils - def _prepare(weights): """Prepare the cost matrix. @@ -73,6 +73,7 @@ def _greedy_assignment(adj_matrix): Each row and column can have at most one true element. Some of the rows and columns may not be matched. """ + _, num_elems, _ = tf_utils.get_shape_list(adj_matrix, expected_rank=3) adj_matrix = tf.transpose(adj_matrix, [1, 0, 2]) diff --git a/models/official/projects/detr/ops/matchers_test.py b/models/official/projects/detr/ops/matchers_test.py index 09b12e1f..87e27430 100644 --- a/models/official/projects/detr/ops/matchers_test.py +++ b/models/official/projects/detr/ops/matchers_test.py @@ -18,9 +18,8 @@ from scipy import optimize import tensorflow as tf -from official.projects.detr.ops import matchers - - +# from official.projects.detr.ops import matchers +import matchers class MatchersOpsTest(tf.test.TestCase): def testLinearSumAssignment(self): diff --git a/models/official/projects/detr/tasks/__pycache__/detection.cpython-39.pyc b/models/official/projects/detr/tasks/__pycache__/detection.cpython-39.pyc new file mode 100644 index 00000000..b4aac805 Binary files /dev/null and b/models/official/projects/detr/tasks/__pycache__/detection.cpython-39.pyc differ diff --git a/models/official/projects/detr/tasks/detection.py b/models/official/projects/detr/tasks/detection.py index 55806d0c..b247df0a 100644 --- a/models/official/projects/detr/tasks/detection.py +++ b/models/official/projects/detr/tasks/detection.py @@ -152,7 +152,7 @@ def _compute_cost(self, cls_outputs, box_outputs, cls_targets, box_targets): tf.cast(tf.not_equal(cls_targets, 0), dtype=total_cost.dtype), axis=1) total_cost = (1 - valid) * max_cost + valid * total_cost - # Set inf of nan to large constant + # Set inf or nan to large constant total_cost = tf.where( tf.logical_or(tf.math.is_nan(total_cost), tf.math.is_inf(total_cost)), max_cost * tf.ones_like(total_cost, dtype=total_cost.dtype), @@ -176,7 +176,7 @@ def build_losses(self, outputs, labels, aux_losses=None): target_index = tf.math.argmax(indices, axis=1) cls_assigned = tf.gather(cls_outputs, target_index, batch_dims=1, axis=1) box_assigned = tf.gather(box_outputs, target_index, batch_dims=1, axis=1) - + background = tf.equal(cls_targets, 0) num_boxes = tf.reduce_sum( tf.cast(tf.logical_not(background), tf.float32), axis=-1) diff --git a/models/official/projects/maskformer/.gitignore b/models/official/projects/maskformer/.gitignore new file mode 100644 index 00000000..d40353df --- /dev/null +++ b/models/official/projects/maskformer/.gitignore @@ -0,0 +1,4 @@ +.gitignore +myreadme.md +ckpts/ +.npy \ No newline at end of file diff --git a/models/official/projects/maskformer/.ipynb_checkpoints/testing-checkpoint.ipynb b/models/official/projects/maskformer/.ipynb_checkpoints/testing-checkpoint.ipynb new file mode 100644 index 00000000..7fec5150 --- /dev/null +++ b/models/official/projects/maskformer/.ipynb_checkpoints/testing-checkpoint.ipynb @@ -0,0 +1,6 @@ +{ + "cells": [], + "metadata": {}, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/models/official/projects/maskformer/__pycache__/__init__.cpython-38.pyc b/models/official/projects/maskformer/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 00000000..55d3bd5d Binary files /dev/null and b/models/official/projects/maskformer/__pycache__/__init__.cpython-38.pyc differ diff --git a/models/official/projects/maskformer/__pycache__/__init__.cpython-39.pyc b/models/official/projects/maskformer/__pycache__/__init__.cpython-39.pyc index 00b49200..cbfe52ca 100644 Binary files a/models/official/projects/maskformer/__pycache__/__init__.cpython-39.pyc and b/models/official/projects/maskformer/__pycache__/__init__.cpython-39.pyc differ diff --git a/models/official/projects/maskformer/__pycache__/optimization.cpython-39.pyc b/models/official/projects/maskformer/__pycache__/optimization.cpython-39.pyc new file mode 100644 index 00000000..99998d36 Binary files /dev/null and b/models/official/projects/maskformer/__pycache__/optimization.cpython-39.pyc differ diff --git a/models/official/projects/maskformer/configs/.ipynb_checkpoints/maskformer-checkpoint.py b/models/official/projects/maskformer/configs/.ipynb_checkpoints/maskformer-checkpoint.py new file mode 100644 index 00000000..8338d6c8 --- /dev/null +++ b/models/official/projects/maskformer/configs/.ipynb_checkpoints/maskformer-checkpoint.py @@ -0,0 +1,212 @@ +# Copyright 2022 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""MaskFormer configurations.""" + +import dataclasses +import os +from typing import List, Optional, Union + +from official.core import config_definitions as cfg +from official.core import exp_factory +from official.modeling import hyperparams +from official.vision.configs import backbones +from official.vision.configs import common +from official.projects.maskformer import optimization + + +@dataclasses.dataclass +class Parser(hyperparams.Config): + """Config definitions for parser""" + output_size: List[int] = None + min_scale: float = 0.3 + aspect_ratio_range: List[float] = (0.5, 2.0) + min_overlap_params: List[float] = (0.0, 1.4, 0.2, 0.1) + max_retry: int = 50 + pad_output: bool = False + resize_eval_groundtruth: bool = True + groundtruth_padded_size: Optional[List[int]] = None + ignore_label: int = 0 + aug_rand_hflip: bool = True + aug_scale_min: float = 1.0 + aug_scale_max: float = 1.0 + color_aug_ssd: bool = False + brightness: float = 0.2 + saturation: float = 0.3 + contrast: float = 0.5 + aug_type: Optional[common.Augmentation] = None + sigma: float = 8.0 + small_instance_area_threshold: int = 4096 + small_instance_weight: float = 3.0 + dtype: str = 'float32' + seed: int = None + +@dataclasses.dataclass +class DataConfig(cfg.DataConfig): + """Input config for training.""" + input_path: str = '' + tfds_name: str = '' + tfds_split: str = 'train' + global_batch_size: int = 0 + is_training: bool = False + regenerate_source_id: bool = False + # TODO : Change the dtype to bloat16 for TPU training + dtype: str = 'bfloat16' + decoder: common.DataDecoder = common.DataDecoder() + shuffle_buffer_size: int = 10000 + file_type: str = 'tfrecord' + drop_remainder: bool = True + parser: Parser = Parser() + + +@dataclasses.dataclass +class Losses(hyperparams.Config): + # TODO update these for maskformer + class_offset: int = 0 + lambda_cls: float = 1.0 + lambda_box: float = 5.0 + lambda_giou: float = 2.0 + background_cls_weight: float = 0.1 + l2_weight_decay: float = 1e-4 + + +@dataclasses.dataclass +class MaskFormer(hyperparams.Config): + # TODO update these for maskformer + """MaskFormer model definations.""" + num_queries: int = 100 + hidden_size: int = 256 + # TODO: Actually there are 133 classes for panoptic segmentation + num_classes: int = 133 # 0: background + num_encoder_layers: int = 6 + num_decoder_layers: int = 6 + input_size: List[int] = dataclasses.field(default_factory=list) + backbone: backbones.Backbone = backbones.Backbone( + type='resnet', resnet=backbones.ResNet(model_id=50, bn_trainable=False)) + norm_activation: common.NormActivation = common.NormActivation() + backbone_endpoint_name: str = '5' + + +@dataclasses.dataclass +class MaskFormerTask(cfg.TaskConfig): + model: MaskFormer = MaskFormer() + train_data: cfg.DataConfig = cfg.DataConfig() + validation_data: cfg.DataConfig = cfg.DataConfig() + losses: Losses = Losses() + init_checkpoint: Optional[str] = None + init_checkpoint_modules: Union[str, List[str]] = 'all' # all, backbone + annotation_file: Optional[str] = None + per_category_metrics: bool = False + +# TODO : we should pass this via cmd +# COCO_INPUT_PATH_BASE = '/depot/davisjam/data/vishal/datasets/coco/' +COCO_INPUT_PATH_BASE = 'gs://cam2-datasets/coco_panoptic/' +COCO_TRAIN_EXAMPLES = 118287 +COCO_VAL_EXAMPLES = 5000 + + +@exp_factory.register_config_factory('maskformer_coco_panoptic') +def maskformer_coco_panoptic() -> cfg.ExperimentConfig: + """Config to get results that matches the paper.""" + train_batch_size = 8 + eval_batch_size = 8 + steps_per_epoch = 100 +# steps_per_epoch = COCO_TRAIN_EXAMPLES // train_batch_size + train_steps = 300 * steps_per_epoch # 300 epochs + decay_at = train_steps - 100 * steps_per_epoch # 200 epochs + config = cfg.ExperimentConfig( + task=MaskFormerTask( + init_checkpoint='', + init_checkpoint_modules='backbone', + annotation_file=os.path.join(COCO_INPUT_PATH_BASE,'annotations' + 'instances_train2017.json'), + model=MaskFormer( + input_size=[640, 640, 3], + norm_activation=common.NormActivation()), + losses=Losses(), + train_data=DataConfig( + input_path=os.path.join(COCO_INPUT_PATH_BASE, 'tfrecords/train*'), + is_training=True, + global_batch_size=train_batch_size, + shuffle_buffer_size=1000, + parser = Parser( + output_size = [640,640], + min_scale = 0.3, + aspect_ratio_range = (0.5, 2.0), + min_overlap_params = (0.0, 1.4, 0.2, 0.1), + max_retry = 50, + pad_output = False, + resize_eval_groundtruth = True, + groundtruth_padded_size = None, + ignore_label = 0, + aug_rand_hflip = True, + aug_scale_min = 1.0, + aug_scale_max = 1.0, + color_aug_ssd = False, + brightness = 0.2, + saturation = 0.3, + contrast = 0.5, + # TODO choose appropriate augmentation + aug_type = None, + sigma = 8.0, + small_instance_area_threshold = 4096, + small_instance_weight = 3.0, + dtype = 'bfloat16', + seed = 2045, + ) + ), + validation_data=DataConfig( + input_path=os.path.join(COCO_INPUT_PATH_BASE, 'tfrecords/val*'), + is_training=False, + global_batch_size=eval_batch_size, + drop_remainder=False, + parser = Parser( + output_size = [640,640], + pad_output = True, + seed = 4096, + ) + + )), + trainer=cfg.TrainerConfig( + train_steps=train_steps, + validation_steps=COCO_VAL_EXAMPLES // eval_batch_size, + steps_per_loop=steps_per_epoch, + summary_interval=steps_per_epoch, + checkpoint_interval=steps_per_epoch, + validation_interval= 5 * steps_per_epoch, + max_to_keep=1, + best_checkpoint_export_subdir='best_ckpt', + # TODO: Not defined the metric + optimizer_config=optimization.OptimizationConfig({ + 'optimizer': { + 'type': 'detr_adamw', + 'detr_adamw': { + 'weight_decay_rate': 1e-4, + 'global_clipnorm': 0.1, + # Avoid AdamW legacy behavior. + 'gradient_clip_norm': 0.0 + } + }, + 'learning_rate': { + 'type': 'stepwise', + 'stepwise': { + 'boundaries': [decay_at], + 'values': [0.0001, 1.0e-05] + } + }, + })), + restrictions=[ + 'task.train_data.is_training != None', + ]) + return config diff --git a/models/official/projects/maskformer/configs/__pycache__/maskformer.cpython-39.pyc b/models/official/projects/maskformer/configs/__pycache__/maskformer.cpython-39.pyc index a87ae537..eab1e3d1 100644 Binary files a/models/official/projects/maskformer/configs/__pycache__/maskformer.cpython-39.pyc and b/models/official/projects/maskformer/configs/__pycache__/maskformer.cpython-39.pyc differ diff --git a/models/official/projects/maskformer/configs/maskformer.py b/models/official/projects/maskformer/configs/maskformer.py index 7844e0b8..024edf59 100644 --- a/models/official/projects/maskformer/configs/maskformer.py +++ b/models/official/projects/maskformer/configs/maskformer.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -"""DETR configurations.""" +"""MaskFormer configurations.""" import dataclasses import os @@ -21,10 +21,9 @@ from official.core import config_definitions as cfg from official.core import exp_factory from official.modeling import hyperparams -# from official.projects.detr import optimization -# from official.projects.detr.dataloaders import coco from official.vision.configs import backbones from official.vision.configs import common +from official.projects.maskformer import optimization @dataclasses.dataclass @@ -62,6 +61,7 @@ class DataConfig(cfg.DataConfig): global_batch_size: int = 0 is_training: bool = False regenerate_source_id: bool = False + # TODO : Change the dtype to bloat16 for TPU training dtype: str = 'bfloat16' decoder: common.DataDecoder = common.DataDecoder() shuffle_buffer_size: int = 10000 @@ -79,6 +79,10 @@ class Losses(hyperparams.Config): lambda_giou: float = 2.0 background_cls_weight: float = 0.1 l2_weight_decay: float = 1e-4 + cost_class = 1.0 + cost_dice = 1.0 + cost_focal = 20.0 + no_object_weight = .1 @dataclasses.dataclass @@ -87,8 +91,10 @@ class MaskFormer(hyperparams.Config): """MaskFormer model definations.""" num_queries: int = 100 hidden_size: int = 256 - num_classes: int = 91 # 0: background - num_encoder_layers: int = 6 + # TODO: Actually there are 133 classes for panoptic segmentation + num_classes: int = 133 # 0: background + fpn_encoder_layers: int = 6 + detr_encoder_layers: int = 0 num_decoder_layers: int = 6 input_size: List[int] = dataclasses.field(default_factory=list) backbone: backbones.Backbone = backbones.Backbone( @@ -108,7 +114,9 @@ class MaskFormerTask(cfg.TaskConfig): annotation_file: Optional[str] = None per_category_metrics: bool = False -COCO_INPUT_PATH_BASE = '/depot/davisjam/data/vishal/datasets/coco' +# TODO : we should pass this via cmd +# COCO_INPUT_PATH_BASE = '/depot/davisjam/data/vishal/datasets/coco/' +COCO_INPUT_PATH_BASE = 'gs://cam2-datasets/coco_panoptic/' COCO_TRAIN_EXAMPLES = 118287 COCO_VAL_EXAMPLES = 5000 @@ -116,32 +124,64 @@ class MaskFormerTask(cfg.TaskConfig): @exp_factory.register_config_factory('maskformer_coco_panoptic') def maskformer_coco_panoptic() -> cfg.ExperimentConfig: """Config to get results that matches the paper.""" - train_batch_size = 64 - eval_batch_size = 64 - steps_per_epoch = COCO_TRAIN_EXAMPLES // train_batch_size + train_batch_size = 8 + eval_batch_size = 8 + steps_per_epoch = 100 +# steps_per_epoch = COCO_TRAIN_EXAMPLES // train_batch_size train_steps = 300 * steps_per_epoch # 300 epochs decay_at = train_steps - 100 * steps_per_epoch # 200 epochs config = cfg.ExperimentConfig( task=MaskFormerTask( init_checkpoint='', init_checkpoint_modules='backbone', - annotation_file=os.path.join(COCO_INPUT_PATH_BASE, - 'instances_val2017.json'), + annotation_file=os.path.join(COCO_INPUT_PATH_BASE,'annotations' + 'instances_train2017.json'), model=MaskFormer( - input_size=[1333, 1333, 3], + input_size=[640, 640, 3], norm_activation=common.NormActivation()), losses=Losses(), train_data=DataConfig( - input_path=os.path.join(COCO_INPUT_PATH_BASE, 'train*'), + input_path=os.path.join(COCO_INPUT_PATH_BASE, 'tfrecords/train*'), is_training=True, global_batch_size=train_batch_size, shuffle_buffer_size=1000, + parser = Parser( + output_size = [640,640], + min_scale = 0.3, + aspect_ratio_range = (0.5, 2.0), + min_overlap_params = (0.0, 1.4, 0.2, 0.1), + max_retry = 50, + pad_output = False, + resize_eval_groundtruth = True, + groundtruth_padded_size = None, + ignore_label = 0, + aug_rand_hflip = True, + aug_scale_min = 1.0, + aug_scale_max = 1.0, + color_aug_ssd = False, + brightness = 0.2, + saturation = 0.3, + contrast = 0.5, + # TODO choose appropriate augmentation + aug_type = None, + sigma = 8.0, + small_instance_area_threshold = 4096, + small_instance_weight = 3.0, + dtype = 'bfloat16', + seed = 2045, + ) ), validation_data=DataConfig( - input_path=os.path.join(COCO_INPUT_PATH_BASE, 'val*'), + input_path=os.path.join(COCO_INPUT_PATH_BASE, 'tfrecords/val*'), is_training=False, global_batch_size=eval_batch_size, drop_remainder=False, + parser = Parser( + output_size = [640,640], + pad_output = True, + seed = 4096, + ) + )), trainer=cfg.TrainerConfig( train_steps=train_steps, @@ -149,10 +189,10 @@ def maskformer_coco_panoptic() -> cfg.ExperimentConfig: steps_per_loop=steps_per_epoch, summary_interval=steps_per_epoch, checkpoint_interval=steps_per_epoch, - validation_interval=5 * steps_per_epoch, + validation_interval= 5 * steps_per_epoch, max_to_keep=1, best_checkpoint_export_subdir='best_ckpt', - best_checkpoint_eval_metric='AP', + # TODO: Not defined the metric optimizer_config=optimization.OptimizationConfig({ 'optimizer': { 'type': 'detr_adamw', diff --git a/models/official/projects/maskformer/data/create_coco_tf_record.py b/models/official/projects/maskformer/data/create_coco_tf_record.py new file mode 100644 index 00000000..07ca51f3 --- /dev/null +++ b/models/official/projects/maskformer/data/create_coco_tf_record.py @@ -0,0 +1,742 @@ +# Copyright 2023 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +r"""Convert raw COCO dataset to TFRecord format. + +This scripts follows the label map decoder format and supports detection +boxes, instance masks and captions. + +Example usage: + python create_coco_tf_record.py --logtostderr \ + --image_dir="${TRAIN_IMAGE_DIR}" \ + --image_info_file="${TRAIN_IMAGE_INFO_FILE}" \ + --object_annotations_file="${TRAIN_ANNOTATIONS_FILE}" \ + --caption_annotations_file="${CAPTION_ANNOTATIONS_FILE}" \ + --output_file_prefix="${OUTPUT_DIR/FILE_PREFIX}" \ + --num_shards=100 +""" +import collections +import json +import logging +import os +from absl import app # pylint:disable=unused-import +from absl import flags +import numpy as np + +from pycocotools import mask +import tensorflow as tf +from tqdm import tqdm +import multiprocessing as mp +from official.vision.data import tfrecord_lib + + +flags.DEFINE_boolean( + 'include_masks', False, 'Whether to include instance segmentations masks ' + '(PNG encoded) in the result. default: False.') +flags.DEFINE_multi_string('image_dir', '', 'Directory containing images.') +flags.DEFINE_string( + 'image_info_file', '', 'File containing image information. ' + 'Tf Examples in the output files correspond to the image ' + 'info entries in this file. If this file is not provided ' + 'object_annotations_file is used if present. Otherwise, ' + 'caption_annotations_file is used to get image info.') +flags.DEFINE_string( + 'object_annotations_file', '', 'File containing object ' + 'annotations - boxes and instance masks.') +flags.DEFINE_string('caption_annotations_file', '', 'File containing image ' + 'captions.') +flags.DEFINE_string('panoptic_annotations_file', '', 'File containing panoptic ' + 'annotations.') +flags.DEFINE_string('panoptic_masks_dir', '', + 'Directory containing panoptic masks annotations.') +flags.DEFINE_boolean( + 'include_panoptic_masks', False, 'Whether to include category and ' + 'instance masks in the result. These are required to run the PQ evaluator ' + 'default: False.') +flags.DEFINE_boolean( + 'panoptic_skip_crowd', False, 'Whether to skip crowd or not for panoptic ' + 'annotations. default: False.') +flags.DEFINE_string('output_file_prefix', '/tmp/train', 'Path to output file') +flags.DEFINE_integer('num_shards', 32, 'Number of shards for output file.') +_NUM_PROCESSES = flags.DEFINE_integer( + 'num_processes', None, + ('Number of parallel processes to use. ' + 'If set to 0, disables multi-processing.')) + + +FLAGS = flags.FLAGS + +logger = tf.get_logger() +logger.setLevel(logging.INFO) + +_VOID_LABEL = 0 +_VOID_INSTANCE_ID = 0 +_THING_CLASS_ID = 1 +_STUFF_CLASSES_OFFSET = 90 + +COCO_CATEGORIES = [ + {"color": [220, 20, 60], "isthing": 1, "id": 1, "name": "person"}, + {"color": [119, 11, 32], "isthing": 1, "id": 2, "name": "bicycle"}, + {"color": [0, 0, 142], "isthing": 1, "id": 3, "name": "car"}, + {"color": [0, 0, 230], "isthing": 1, "id": 4, "name": "motorcycle"}, + {"color": [106, 0, 228], "isthing": 1, "id": 5, "name": "airplane"}, + {"color": [0, 60, 100], "isthing": 1, "id": 6, "name": "bus"}, + {"color": [0, 80, 100], "isthing": 1, "id": 7, "name": "train"}, + {"color": [0, 0, 70], "isthing": 1, "id": 8, "name": "truck"}, + {"color": [0, 0, 192], "isthing": 1, "id": 9, "name": "boat"}, + {"color": [250, 170, 30], "isthing": 1, "id": 10, "name": "traffic light"}, + {"color": [100, 170, 30], "isthing": 1, "id": 11, "name": "fire hydrant"}, + {"color": [220, 220, 0], "isthing": 1, "id": 13, "name": "stop sign"}, + {"color": [175, 116, 175], "isthing": 1, "id": 14, "name": "parking meter"}, + {"color": [250, 0, 30], "isthing": 1, "id": 15, "name": "bench"}, + {"color": [165, 42, 42], "isthing": 1, "id": 16, "name": "bird"}, + {"color": [255, 77, 255], "isthing": 1, "id": 17, "name": "cat"}, + {"color": [0, 226, 252], "isthing": 1, "id": 18, "name": "dog"}, + {"color": [182, 182, 255], "isthing": 1, "id": 19, "name": "horse"}, + {"color": [0, 82, 0], "isthing": 1, "id": 20, "name": "sheep"}, + {"color": [120, 166, 157], "isthing": 1, "id": 21, "name": "cow"}, + {"color": [110, 76, 0], "isthing": 1, "id": 22, "name": "elephant"}, + {"color": [174, 57, 255], "isthing": 1, "id": 23, "name": "bear"}, + {"color": [199, 100, 0], "isthing": 1, "id": 24, "name": "zebra"}, + {"color": [72, 0, 118], "isthing": 1, "id": 25, "name": "giraffe"}, + {"color": [255, 179, 240], "isthing": 1, "id": 27, "name": "backpack"}, + {"color": [0, 125, 92], "isthing": 1, "id": 28, "name": "umbrella"}, + {"color": [209, 0, 151], "isthing": 1, "id": 31, "name": "handbag"}, + {"color": [188, 208, 182], "isthing": 1, "id": 32, "name": "tie"}, + {"color": [0, 220, 176], "isthing": 1, "id": 33, "name": "suitcase"}, + {"color": [255, 99, 164], "isthing": 1, "id": 34, "name": "frisbee"}, + {"color": [92, 0, 73], "isthing": 1, "id": 35, "name": "skis"}, + {"color": [133, 129, 255], "isthing": 1, "id": 36, "name": "snowboard"}, + {"color": [78, 180, 255], "isthing": 1, "id": 37, "name": "sports ball"}, + {"color": [0, 228, 0], "isthing": 1, "id": 38, "name": "kite"}, + {"color": [174, 255, 243], "isthing": 1, "id": 39, "name": "baseball bat"}, + {"color": [45, 89, 255], "isthing": 1, "id": 40, "name": "baseball glove"}, + {"color": [134, 134, 103], "isthing": 1, "id": 41, "name": "skateboard"}, + {"color": [145, 148, 174], "isthing": 1, "id": 42, "name": "surfboard"}, + {"color": [255, 208, 186], "isthing": 1, "id": 43, "name": "tennis racket"}, + {"color": [197, 226, 255], "isthing": 1, "id": 44, "name": "bottle"}, + {"color": [171, 134, 1], "isthing": 1, "id": 46, "name": "wine glass"}, + {"color": [109, 63, 54], "isthing": 1, "id": 47, "name": "cup"}, + {"color": [207, 138, 255], "isthing": 1, "id": 48, "name": "fork"}, + {"color": [151, 0, 95], "isthing": 1, "id": 49, "name": "knife"}, + {"color": [9, 80, 61], "isthing": 1, "id": 50, "name": "spoon"}, + {"color": [84, 105, 51], "isthing": 1, "id": 51, "name": "bowl"}, + {"color": [74, 65, 105], "isthing": 1, "id": 52, "name": "banana"}, + {"color": [166, 196, 102], "isthing": 1, "id": 53, "name": "apple"}, + {"color": [208, 195, 210], "isthing": 1, "id": 54, "name": "sandwich"}, + {"color": [255, 109, 65], "isthing": 1, "id": 55, "name": "orange"}, + {"color": [0, 143, 149], "isthing": 1, "id": 56, "name": "broccoli"}, + {"color": [179, 0, 194], "isthing": 1, "id": 57, "name": "carrot"}, + {"color": [209, 99, 106], "isthing": 1, "id": 58, "name": "hot dog"}, + {"color": [5, 121, 0], "isthing": 1, "id": 59, "name": "pizza"}, + {"color": [227, 255, 205], "isthing": 1, "id": 60, "name": "donut"}, + {"color": [147, 186, 208], "isthing": 1, "id": 61, "name": "cake"}, + {"color": [153, 69, 1], "isthing": 1, "id": 62, "name": "chair"}, + {"color": [3, 95, 161], "isthing": 1, "id": 63, "name": "couch"}, + {"color": [163, 255, 0], "isthing": 1, "id": 64, "name": "potted plant"}, + {"color": [119, 0, 170], "isthing": 1, "id": 65, "name": "bed"}, + {"color": [0, 182, 199], "isthing": 1, "id": 67, "name": "dining table"}, + {"color": [0, 165, 120], "isthing": 1, "id": 70, "name": "toilet"}, + {"color": [183, 130, 88], "isthing": 1, "id": 72, "name": "tv"}, + {"color": [95, 32, 0], "isthing": 1, "id": 73, "name": "laptop"}, + {"color": [130, 114, 135], "isthing": 1, "id": 74, "name": "mouse"}, + {"color": [110, 129, 133], "isthing": 1, "id": 75, "name": "remote"}, + {"color": [166, 74, 118], "isthing": 1, "id": 76, "name": "keyboard"}, + {"color": [219, 142, 185], "isthing": 1, "id": 77, "name": "cell phone"}, + {"color": [79, 210, 114], "isthing": 1, "id": 78, "name": "microwave"}, + {"color": [178, 90, 62], "isthing": 1, "id": 79, "name": "oven"}, + {"color": [65, 70, 15], "isthing": 1, "id": 80, "name": "toaster"}, + {"color": [127, 167, 115], "isthing": 1, "id": 81, "name": "sink"}, + {"color": [59, 105, 106], "isthing": 1, "id": 82, "name": "refrigerator"}, + {"color": [142, 108, 45], "isthing": 1, "id": 84, "name": "book"}, + {"color": [196, 172, 0], "isthing": 1, "id": 85, "name": "clock"}, + {"color": [95, 54, 80], "isthing": 1, "id": 86, "name": "vase"}, + {"color": [128, 76, 255], "isthing": 1, "id": 87, "name": "scissors"}, + {"color": [201, 57, 1], "isthing": 1, "id": 88, "name": "teddy bear"}, + {"color": [246, 0, 122], "isthing": 1, "id": 89, "name": "hair drier"}, + {"color": [191, 162, 208], "isthing": 1, "id": 90, "name": "toothbrush"}, + {"color": [255, 255, 128], "isthing": 0, "id": 92, "name": "banner"}, + {"color": [147, 211, 203], "isthing": 0, "id": 93, "name": "blanket"}, + {"color": [150, 100, 100], "isthing": 0, "id": 95, "name": "bridge"}, + {"color": [168, 171, 172], "isthing": 0, "id": 100, "name": "cardboard"}, + {"color": [146, 112, 198], "isthing": 0, "id": 107, "name": "counter"}, + {"color": [210, 170, 100], "isthing": 0, "id": 109, "name": "curtain"}, + {"color": [92, 136, 89], "isthing": 0, "id": 112, "name": "door-stuff"}, + {"color": [218, 88, 184], "isthing": 0, "id": 118, "name": "floor-wood"}, + {"color": [241, 129, 0], "isthing": 0, "id": 119, "name": "flower"}, + {"color": [217, 17, 255], "isthing": 0, "id": 122, "name": "fruit"}, + {"color": [124, 74, 181], "isthing": 0, "id": 125, "name": "gravel"}, + {"color": [70, 70, 70], "isthing": 0, "id": 128, "name": "house"}, + {"color": [255, 228, 255], "isthing": 0, "id": 130, "name": "light"}, + {"color": [154, 208, 0], "isthing": 0, "id": 133, "name": "mirror-stuff"}, + {"color": [193, 0, 92], "isthing": 0, "id": 138, "name": "net"}, + {"color": [76, 91, 113], "isthing": 0, "id": 141, "name": "pillow"}, + {"color": [255, 180, 195], "isthing": 0, "id": 144, "name": "platform"}, + {"color": [106, 154, 176], "isthing": 0, "id": 145, "name": "playingfield"}, + {"color": [230, 150, 140], "isthing": 0, "id": 147, "name": "railroad"}, + {"color": [60, 143, 255], "isthing": 0, "id": 148, "name": "river"}, + {"color": [128, 64, 128], "isthing": 0, "id": 149, "name": "road"}, + {"color": [92, 82, 55], "isthing": 0, "id": 151, "name": "roof"}, + {"color": [254, 212, 124], "isthing": 0, "id": 154, "name": "sand"}, + {"color": [73, 77, 174], "isthing": 0, "id": 155, "name": "sea"}, + {"color": [255, 160, 98], "isthing": 0, "id": 156, "name": "shelf"}, + {"color": [255, 255, 255], "isthing": 0, "id": 159, "name": "snow"}, + {"color": [104, 84, 109], "isthing": 0, "id": 161, "name": "stairs"}, + {"color": [169, 164, 131], "isthing": 0, "id": 166, "name": "tent"}, + {"color": [225, 199, 255], "isthing": 0, "id": 168, "name": "towel"}, + {"color": [137, 54, 74], "isthing": 0, "id": 171, "name": "wall-brick"}, + {"color": [135, 158, 223], "isthing": 0, "id": 175, "name": "wall-stone"}, + {"color": [7, 246, 231], "isthing": 0, "id": 176, "name": "wall-tile"}, + {"color": [107, 255, 200], "isthing": 0, "id": 177, "name": "wall-wood"}, + {"color": [58, 41, 149], "isthing": 0, "id": 178, "name": "water-other"}, + {"color": [183, 121, 142], "isthing": 0, "id": 180, "name": "window-blind"}, + {"color": [255, 73, 97], "isthing": 0, "id": 181, "name": "window-other"}, + {"color": [107, 142, 35], "isthing": 0, "id": 184, "name": "tree-merged"}, + {"color": [190, 153, 153], "isthing": 0, "id": 185, "name": "fence-merged"}, + {"color": [146, 139, 141], "isthing": 0, "id": 186, "name": "ceiling-merged"}, + {"color": [70, 130, 180], "isthing": 0, "id": 187, "name": "sky-other-merged"}, + {"color": [134, 199, 156], "isthing": 0, "id": 188, "name": "cabinet-merged"}, + {"color": [209, 226, 140], "isthing": 0, "id": 189, "name": "table-merged"}, + {"color": [96, 36, 108], "isthing": 0, "id": 190, "name": "floor-other-merged"}, + {"color": [96, 96, 96], "isthing": 0, "id": 191, "name": "pavement-merged"}, + {"color": [64, 170, 64], "isthing": 0, "id": 192, "name": "mountain-merged"}, + {"color": [152, 251, 152], "isthing": 0, "id": 193, "name": "grass-merged"}, + {"color": [208, 229, 228], "isthing": 0, "id": 194, "name": "dirt-merged"}, + {"color": [206, 186, 171], "isthing": 0, "id": 195, "name": "paper-merged"}, + {"color": [152, 161, 64], "isthing": 0, "id": 196, "name": "food-other-merged"}, + {"color": [116, 112, 0], "isthing": 0, "id": 197, "name": "building-other-merged"}, + {"color": [0, 114, 143], "isthing": 0, "id": 198, "name": "rock-merged"}, + {"color": [102, 102, 156], "isthing": 0, "id": 199, "name": "wall-other-merged"}, + {"color": [250, 141, 255], "isthing": 0, "id": 200, "name": "rug-merged"}, +] + +def coco_segmentation_to_mask_png(segmentation, height, width, is_crowd): + """Encode a COCO mask segmentation as PNG string.""" + run_len_encoding = mask.frPyObjects(segmentation, height, width) + binary_mask = mask.decode(run_len_encoding) + if not is_crowd: + binary_mask = np.amax(binary_mask, axis=2) + + return tfrecord_lib.encode_mask_as_png(binary_mask) + + +def generate_coco_panoptics_masks(segments_info, mask_path, + include_panoptic_masks, + is_category_thing): + """Creates masks for panoptic segmentation task. + + Args: + segments_info: a list of dicts, where each dict has keys: [u'id', + u'category_id', u'area', u'bbox', u'iscrowd'], detailing information for + each segment in the panoptic mask. + mask_path: path to the panoptic mask. + include_panoptic_masks: bool, when set to True, category and instance + masks are included in the outputs. Set this to True, when using + the Panoptic Quality evaluator. + is_category_thing: a dict with category ids as keys and, 0/1 as values to + represent "stuff" and "things" classes respectively. + + Returns: + A dict with keys: [u'semantic_segmentation_mask', u'category_mask', + u'instance_mask']. The dict contains 'category_mask' and 'instance_mask' + only if `include_panoptic_eval_masks` is set to True. + """ + rgb_mask = tfrecord_lib.read_image(mask_path) + r, g, b = np.split(rgb_mask, 3, axis=-1) + + # decode rgb encoded panoptic mask to get segments ids + # refer https://cocodataset.org/#format-data + segments_encoded_mask = (r + g * 256 + b * (256**2)).squeeze() + + # create contiguous ids for segments + _meta = {} + + thing_dataset_id_to_contiguous_id = {} + stuff_dataset_id_to_contiguous_id = {} + for i, cat in enumerate(COCO_CATEGORIES): + if cat["isthing"]: + thing_dataset_id_to_contiguous_id[cat["id"]] = i + else: + stuff_dataset_id_to_contiguous_id[cat["id"]] = i + + _meta["thing_dataset_id_to_contiguous_id"] = thing_dataset_id_to_contiguous_id + _meta["stuff_dataset_id_to_contiguous_id"] = stuff_dataset_id_to_contiguous_id + + # All required masks + semantic_segmentation_mask = np.ones_like( + segments_encoded_mask, dtype=np.uint8) * _VOID_LABEL + if include_panoptic_masks: + category_mask = np.ones_like( + segments_encoded_mask, dtype=np.uint8) * _VOID_LABEL + instance_mask = np.ones_like( + segments_encoded_mask, dtype=np.uint8) * _VOID_INSTANCE_ID + contiguous_id_mask = np.ones_like( + segments_encoded_mask, dtype=np.uint8) * _VOID_INSTANCE_ID + + class_ids = [] + + for idx, segment in enumerate(segments_info): + segment_id = segment['id'] + category_id = segment['category_id'] + is_crowd = segment['iscrowd'] + + if category_id in _meta["thing_dataset_id_to_contiguous_id"]: + contiguous_id = _meta["thing_dataset_id_to_contiguous_id"][category_id] + else: + contiguous_id = _meta["stuff_dataset_id_to_contiguous_id"][category_id] + + if FLAGS.panoptic_skip_crowd and is_crowd: + continue + + if is_category_thing[category_id]: + # This for thing + encoded_category_id = _THING_CLASS_ID + instance_id = idx + 1 + else: + # This is for stuff (for stuff no instance id) + encoded_category_id = category_id - _STUFF_CLASSES_OFFSET + instance_id = _VOID_INSTANCE_ID + + segment_mask = (segments_encoded_mask == segment_id) + + semantic_segmentation_mask[segment_mask] = encoded_category_id + contiguous_id_mask[segment_mask] = contiguous_id + if include_panoptic_masks: + category_mask[segment_mask] = category_id + instance_mask[segment_mask] = instance_id + if not is_crowd: + class_ids.append(contiguous_id) + + + + outputs = { + 'semantic_segmentation_mask': tfrecord_lib.encode_mask_as_png( + semantic_segmentation_mask) + } + print("class_ids_raw : ", class_ids) + if include_panoptic_masks: + outputs.update({ + 'category_mask': tfrecord_lib.encode_mask_as_png(category_mask), + 'instance_mask': tfrecord_lib.encode_mask_as_png(instance_mask), + 'class_ids': class_ids, + 'contiguous_id_mask': tfrecord_lib.encode_mask_as_png(contiguous_id_mask), + }) + return outputs + + +def coco_annotations_to_lists(bbox_annotations, id_to_name_map, + image_height, image_width, include_masks): + """Converts COCO annotations to feature lists.""" + + data = dict((k, list()) for k in + ['xmin', 'xmax', 'ymin', 'ymax', 'is_crowd', + 'category_id', 'category_names', 'area']) + if include_masks: + data['encoded_mask_png'] = [] + + num_annotations_skipped = 0 + + for object_annotations in bbox_annotations: + (x, y, width, height) = tuple(object_annotations['bbox']) + + if width <= 0 or height <= 0: + num_annotations_skipped += 1 + continue + if x + width > image_width or y + height > image_height: + num_annotations_skipped += 1 + continue + data['xmin'].append(float(x) / image_width) + data['xmax'].append(float(x + width) / image_width) + data['ymin'].append(float(y) / image_height) + data['ymax'].append(float(y + height) / image_height) + data['is_crowd'].append(object_annotations['iscrowd']) + category_id = int(object_annotations['category_id']) + data['category_id'].append(category_id) + data['category_names'].append(id_to_name_map[category_id].encode('utf8')) + data['area'].append(object_annotations['area']) + + if include_masks: + data['encoded_mask_png'].append( + coco_segmentation_to_mask_png(object_annotations['segmentation'], + image_height, image_width, + object_annotations['iscrowd']) + ) + + return data, num_annotations_skipped + + +def bbox_annotations_to_feature_dict( + bbox_annotations, image_height, image_width, id_to_name_map, include_masks): + """Convert COCO annotations to an encoded feature dict.""" + + data, num_skipped = coco_annotations_to_lists( + bbox_annotations, id_to_name_map, image_height, image_width, + include_masks) + feature_dict = {} + if len(bbox_annotations) != num_skipped: + feature_dict = { + 'image/object/bbox/xmin': tfrecord_lib.convert_to_feature(data['xmin']), + 'image/object/bbox/xmax': tfrecord_lib.convert_to_feature(data['xmax']), + 'image/object/bbox/ymin': tfrecord_lib.convert_to_feature(data['ymin']), + 'image/object/bbox/ymax': tfrecord_lib.convert_to_feature(data['ymax']), + 'image/object/class/text': tfrecord_lib.convert_to_feature( + data['category_names'] + ), + 'image/object/class/label': tfrecord_lib.convert_to_feature( + data['category_id'] + ), + 'image/object/is_crowd': tfrecord_lib.convert_to_feature( + data['is_crowd'] + ), + 'image/object/area': tfrecord_lib.convert_to_feature( + data['area'], 'float_list' + ), + } + if include_masks: + feature_dict['image/object/mask'] = tfrecord_lib.convert_to_feature( + data['encoded_mask_png'] + ) + + return feature_dict, num_skipped + + +def encode_caption_annotations(caption_annotations): + captions = [] + for caption_annotation in caption_annotations: + captions.append(caption_annotation['caption'].encode('utf8')) + + return captions + + +def create_tf_example(image, + image_dirs, + panoptic_masks_dir=None, + bbox_annotations=None, + id_to_name_map=None, + caption_annotations=None, + panoptic_annotation=None, + is_category_thing=None, + include_panoptic_masks=False, + include_masks=False): + """Converts image and annotations to a tf.Example proto. + + Args: + image: dict with keys: [u'license', u'file_name', u'coco_url', u'height', + u'width', u'date_captured', u'flickr_url', u'id'] + image_dirs: list of directories containing the image files. + panoptic_masks_dir: `str` of the panoptic masks directory. + bbox_annotations: + list of dicts with keys: [u'segmentation', u'area', u'iscrowd', + u'image_id', u'bbox', u'category_id', u'id'] Notice that bounding box + coordinates in the official COCO dataset are given as [x, y, width, + height] tuples using absolute coordinates where x, y represent the + top-left (0-indexed) corner. This function converts to the format + expected by the Tensorflow Object Detection API (which is which is + [ymin, xmin, ymax, xmax] with coordinates normalized relative to image + size). + id_to_name_map: a dict mapping category IDs to string names. + caption_annotations: + list of dict with keys: [u'id', u'image_id', u'str']. + panoptic_annotation: dict with keys: [u'image_id', u'file_name', + u'segments_info']. Where the value for segments_info is a list of dicts, + with each dict containing information for a single segment in the mask. + is_category_thing: `bool`, whether it is a category thing. + include_panoptic_masks: `bool`, whether to include panoptic masks. + include_masks: Whether to include instance segmentations masks + (PNG encoded) in the result. default: False. + + Returns: + example: The converted tf.Example + num_annotations_skipped: Number of (invalid) annotations that were ignored. + + Raises: + ValueError: if the image pointed to by data['filename'] is not a valid JPEG, + does not exist, or is not unique across image directories. + """ + image_height = image['height'] + image_width = image['width'] + filename = image['file_name'] + image_id = image['id'] + + if len(image_dirs) > 1: + full_paths = [os.path.join(image_dir, filename) for image_dir in image_dirs] + full_existing_paths = [p for p in full_paths if tf.io.gfile.exists(p)] + if not full_existing_paths: + raise ValueError( + '{} does not exist across image directories.'.format(filename)) + if len(full_existing_paths) > 1: + raise ValueError( + '{} is not unique across image directories'.format(filename)) + full_path, = full_existing_paths + # If there is only one image directory, it's not worth checking for existence, + # since trying to open the file will raise an informative error message if it + # does not exist. + else: + image_dir, = image_dirs + full_path = os.path.join(image_dir, filename) + + with tf.io.gfile.GFile(full_path, 'rb') as fid: + encoded_jpg = fid.read() + + feature_dict = tfrecord_lib.image_info_to_feature_dict( + image_height, image_width, filename, image_id, encoded_jpg, 'jpg') + + num_annotations_skipped = 0 + if bbox_annotations: + box_feature_dict, num_skipped = bbox_annotations_to_feature_dict( + bbox_annotations, image_height, image_width, id_to_name_map, + include_masks) + num_annotations_skipped += num_skipped + feature_dict.update(box_feature_dict) + + if caption_annotations: + encoded_captions = encode_caption_annotations(caption_annotations) + feature_dict.update( + {'image/caption': tfrecord_lib.convert_to_feature(encoded_captions)}) + + if panoptic_annotation: + segments_info = panoptic_annotation['segments_info'] + + panoptic_mask_filename = os.path.join( + panoptic_masks_dir, + panoptic_annotation['file_name']) + encoded_panoptic_masks = generate_coco_panoptics_masks( + segments_info, panoptic_mask_filename, include_panoptic_masks, + is_category_thing) + feature_dict.update( + {'image/segmentation/class/encoded': tfrecord_lib.convert_to_feature( + encoded_panoptic_masks['semantic_segmentation_mask'])}) + print("Encoded panoptic class ids :", encoded_panoptic_masks['class_ids']) + if include_panoptic_masks: + feature_dict.update({ + 'image/panoptic/category_mask': tfrecord_lib.convert_to_feature( + encoded_panoptic_masks['category_mask']), + 'image/panoptic/instance_mask': tfrecord_lib.convert_to_feature( + encoded_panoptic_masks['instance_mask']), + 'image/panoptic/class_ids': tfrecord_lib.convert_to_feature( + encoded_panoptic_masks['class_ids'], value_type="int64_list"), + 'image/panoptic/contiguous_mask': tfrecord_lib.convert_to_feature( + encoded_panoptic_masks['contiguous_id_mask']) + }) + + example = tf.train.Example(features=tf.train.Features(feature=feature_dict)) + return example, num_annotations_skipped + + +def _load_object_annotations(object_annotations_file): + """Loads object annotation JSON file.""" + with tf.io.gfile.GFile(object_annotations_file, 'r') as fid: + obj_annotations = json.load(fid) + + images = obj_annotations['images'] + id_to_name_map = dict((element['id'], element['name']) for element in + obj_annotations['categories']) + + img_to_obj_annotation = collections.defaultdict(list) + logging.info('Building bounding box index.') + for annotation in obj_annotations['annotations']: + image_id = annotation['image_id'] + img_to_obj_annotation[image_id].append(annotation) + + missing_annotation_count = 0 + for image in images: + image_id = image['id'] + if image_id not in img_to_obj_annotation: + missing_annotation_count += 1 + + logging.info('%d images are missing bboxes.', missing_annotation_count) + + return img_to_obj_annotation, id_to_name_map + + +def _load_caption_annotations(caption_annotations_file): + """Loads caption annotation JSON file.""" + with tf.io.gfile.GFile(caption_annotations_file, 'r') as fid: + caption_annotations = json.load(fid) + + img_to_caption_annotation = collections.defaultdict(list) + logging.info('Building caption index.') + for annotation in caption_annotations['annotations']: + image_id = annotation['image_id'] + img_to_caption_annotation[image_id].append(annotation) + + missing_annotation_count = 0 + images = caption_annotations['images'] + for image in images: + image_id = image['id'] + if image_id not in img_to_caption_annotation: + missing_annotation_count += 1 + + logging.info('%d images are missing captions.', missing_annotation_count) + + return img_to_caption_annotation + + +def _load_panoptic_annotations(panoptic_annotations_file): + """Loads panoptic annotation from file.""" + with tf.io.gfile.GFile(panoptic_annotations_file, 'r') as fid: + panoptic_annotations = json.load(fid) + + img_to_panoptic_annotation = dict() + logging.info('Building panoptic index.') + for annotation in panoptic_annotations['annotations']: + image_id = annotation['image_id'] + img_to_panoptic_annotation[image_id] = annotation + + is_category_thing = dict() + for category_info in panoptic_annotations['categories']: + is_category_thing[category_info['id']] = category_info['isthing'] == 1 + + missing_annotation_count = 0 + images = panoptic_annotations['images'] + for image in images: + image_id = image['id'] + if image_id not in img_to_panoptic_annotation: + missing_annotation_count += 1 + logging.info( + '%d images are missing panoptic annotations.', missing_annotation_count) + + return img_to_panoptic_annotation, is_category_thing + + +def _load_images_info(images_info_file): + with tf.io.gfile.GFile(images_info_file, 'r') as fid: + info_dict = json.load(fid) + return info_dict['images'] + + +def generate_annotations(images, image_dirs, + panoptic_masks_dir=None, + img_to_obj_annotation=None, + img_to_caption_annotation=None, + img_to_panoptic_annotation=None, + is_category_thing=None, + id_to_name_map=None, + include_panoptic_masks=False, + include_masks=False): + """Generator for COCO annotations.""" + + for image in images: + object_annotation = (img_to_obj_annotation.get(image['id'], None) if + img_to_obj_annotation else None) + + caption_annotaion = (img_to_caption_annotation.get(image['id'], None) if + img_to_caption_annotation else None) + + panoptic_annotation = (img_to_panoptic_annotation.get(image['id'], None) if + img_to_panoptic_annotation else None) + yield (image, image_dirs, panoptic_masks_dir, object_annotation, + id_to_name_map, caption_annotaion, panoptic_annotation, + is_category_thing, include_panoptic_masks, include_masks) + + +def _create_tf_record_from_coco_annotations(images_info_file, + image_dirs, + output_path, + num_shards, + object_annotations_file=None, + caption_annotations_file=None, + panoptic_masks_dir=None, + panoptic_annotations_file=None, + include_panoptic_masks=False, + include_masks=False): + """Loads COCO annotation json files and converts to tf.Record format. + + Args: + images_info_file: JSON file containing image info. The number of tf.Examples + in the output tf Record files is exactly equal to the number of image info + entries in this file. This can be any of train/val/test annotation json + files Eg. 'image_info_test-dev2017.json', + 'instance_annotations_train2017.json', + 'caption_annotations_train2017.json', etc. + image_dirs: List of directories containing the image files. + output_path: Path to output tf.Record file. + num_shards: Number of output files to create. + object_annotations_file: JSON file containing bounding box annotations. + caption_annotations_file: JSON file containing caption annotations. + panoptic_masks_dir: Directory containing panoptic masks. + panoptic_annotations_file: JSON file containing panoptic annotations. + include_panoptic_masks: Whether to include 'category_mask' + and 'instance_mask', which is required by the panoptic quality evaluator. + include_masks: Whether to include instance segmentations masks + (PNG encoded) in the result. default: False. + """ + + logging.info('writing to output path: %s', output_path) + + images = _load_images_info(images_info_file) + + img_to_obj_annotation = None + img_to_caption_annotation = None + id_to_name_map = None + img_to_panoptic_annotation = None + is_category_thing = None + if object_annotations_file: + img_to_obj_annotation, id_to_name_map = ( + _load_object_annotations(object_annotations_file)) + if caption_annotations_file: + img_to_caption_annotation = ( + _load_caption_annotations(caption_annotations_file)) + if panoptic_annotations_file: + img_to_panoptic_annotation, is_category_thing = ( + _load_panoptic_annotations(panoptic_annotations_file)) + + coco_annotations_iter = generate_annotations( + images=images, + image_dirs=image_dirs, + panoptic_masks_dir=panoptic_masks_dir, + img_to_obj_annotation=img_to_obj_annotation, + img_to_caption_annotation=img_to_caption_annotation, + img_to_panoptic_annotation=img_to_panoptic_annotation, + is_category_thing=is_category_thing, + id_to_name_map=id_to_name_map, + include_panoptic_masks=include_panoptic_masks, + include_masks=include_masks) + + num_skipped = tfrecord_lib.write_tf_record_dataset( + output_path, coco_annotations_iter, create_tf_example, num_shards, + multiple_processes=_NUM_PROCESSES.value) + + logging.info('Finished writing, skipped %d annotations.', num_skipped) + + +def main(_): + assert FLAGS.image_dir, '`image_dir` missing.' + assert (FLAGS.image_info_file or FLAGS.object_annotations_file or + FLAGS.caption_annotations_file), ('All annotation files are ' + 'missing.') + if FLAGS.image_info_file: + images_info_file = FLAGS.image_info_file + elif FLAGS.object_annotations_file: + images_info_file = FLAGS.object_annotations_file + else: + images_info_file = FLAGS.caption_annotations_file + + directory = os.path.dirname(FLAGS.output_file_prefix) + if not tf.io.gfile.isdir(directory): + tf.io.gfile.makedirs(directory) + + _create_tf_record_from_coco_annotations(images_info_file, FLAGS.image_dir, + FLAGS.output_file_prefix, + FLAGS.num_shards, + FLAGS.object_annotations_file, + FLAGS.caption_annotations_file, + FLAGS.panoptic_masks_dir, + FLAGS.panoptic_annotations_file, + FLAGS.include_panoptic_masks, + FLAGS.include_masks) + + +if __name__ == '__main__': + app.run(main) diff --git a/models/official/projects/maskformer/data/create_tf_records.sh b/models/official/projects/maskformer/data/create_tf_records.sh index 2dd62a93..d98d8906 100755 --- a/models/official/projects/maskformer/data/create_tf_records.sh +++ b/models/official/projects/maskformer/data/create_tf_records.sh @@ -1,9 +1,9 @@ -DATA_DIR=$1 +# DATA_DIR=$1 # wget http://images.cocodataset.org/annotations/annotations_trainval2017.zip -P $DATA_DIR # wget http://images.cocodataset.org/zips/val2017.zip -P $DATA_DIR # wget http://images.cocodataset.org/annotations/panoptic_annotations_trainval2017.zip -P $DATA_DIR # wget http://images.cocodataset.org/zips/train2017.zip -P $DATA_DIR -http://images.cocodataset.org/annotations/panoptic_annotations_trainval2017.zip + # downloaded_panoptic=true # run_dir=$(pwd) # if ! cd "$DATA_DIR"; then @@ -35,7 +35,7 @@ http://images.cocodataset.org/annotations/panoptic_annotations_trainval2017.zip # else # $($GET_TRAIN) # fi -cd /depot/qqiu/data/vishal/projects/tf_maskformer_integration/ +# # cd /depot/qqiu/data/vishal/projects/tf-maskformer/models # unzip $DATA_DIR/"*".zip -d $DATA_DIR # mkdir $DATA_DIR/zips @@ -46,7 +46,7 @@ cd /depot/qqiu/data/vishal/projects/tf_maskformer_integration/ # unzip $DATA_DIR/annotations/panoptic_val2017.zip -d $DATA_DIR # fi -python3 official/vision/data/create_coco_tf_record.py \ +python3 create_coco_tf_record.py \ --logtostderr \ --image_dir="$DATA_DIR/val2017" \ --object_annotations_file="$DATA_DIR/annotations/instances_val2017.json" \ @@ -54,16 +54,15 @@ python3 official/vision/data/create_coco_tf_record.py \ --panoptic_annotations_file="$DATA_DIR/annotations/panoptic_val2017.json" \ --panoptic_masks_dir="$DATA_DIR/panoptic_val2017" \ --num_shards=8 \ - --include_masks \ --include_panoptic_masks -python3 official/vision/data/create_coco_tf_record.py \ - --logtostderr \ - --image_dir="$DATA_DIR/train2017" \ - --object_annotations_file="$DATA_DIR/annotations/instances_train2017.json" \ - --output_file_prefix="$DATA_DIR/tfrecords/train" \ - --panoptic_annotations_file="$DATA_DIR/annotations/panoptic_train2017.json" \ - --panoptic_masks_dir="$DATA_DIR/panoptic_train2017" \ - --num_shards=150 \ - --include_masks \ - --include_panoptic_masks +# python3 create_coco_tf_record.py \ +# --logtostderr \ +# --image_dir="$DATA_DIR/train2017" \ +# --object_annotations_file="$DATA_DIR/annotations/instances_train2017.json" \ +# --output_file_prefix="$DATA_DIR/tfrecords/train" \ +# --panoptic_annotations_file="$DATA_DIR/annotations/panoptic_train2017.json" \ +# --panoptic_masks_dir="$DATA_DIR/panoptic_train2017" \ +# --num_shards=150 \ +# --include_panoptic_masks\ +# --num_processes 16\ diff --git a/models/official/projects/maskformer/data/wget-log b/models/official/projects/maskformer/data/wget-log index b2a28873..26e4ae51 100644 --- a/models/official/projects/maskformer/data/wget-log +++ b/models/official/projects/maskformer/data/wget-log @@ -1,11 +1,11 @@ ---2023-03-23 20:30:42-- http://images.cocodataset.org/zips/val2017.zip -Resolving images.cocodataset.org (images.cocodataset.org)... 52.217.226.97, 3.5.1.162, 52.217.224.49, ... -Connecting to images.cocodataset.org (images.cocodataset.org)|52.217.226.97|:80... connected. +--2023-06-18 01:02:28-- http://images.cocodataset.org/annotations/annotations_trainval2017.zip +Resolving images.cocodataset.org (images.cocodataset.org)... 52.216.49.41, 52.217.234.249, 3.5.25.137, ... +Connecting to images.cocodataset.org (images.cocodataset.org)|52.216.49.41|:80... connected. HTTP request sent, awaiting response... 200 OK -Length: 815585330 (778M) [application/zip] -Saving to: ‘/depot/davisjam/data/vishal/datasets/coco/val2017.zip’ +Length: 252907541 (241M) [application/zip] +Saving to: ‘/depot/davisjam/data/vishal/datasets/coco/annotations_trainval2017.zip’ - 58% [==============================================================> ] 480,496,948 68.0MB/s eta 6s 60% [===============================================================> ] 494,895,868 68.0MB/s eta 5s 62% [=================================================================> ] 509,051,732 68.0MB/s eta 5s 64% [===================================================================> ] 523,434,268 68.3MB/s eta 5s 65% [=====================================================================> ] 537,768,328 68.4MB/s eta 5s 67% [=======================================================================> ] 552,232,108 68.5MB/s eta 5s 69% [=========================================================================> ] 566,760,748 68.4MB/s eta 4s 71% [===========================================================================> ] 581,224,528 68.5MB/s eta 4s 73% [=============================================================================> ] 595,631,640 68.5MB/s eta 4s 74% [===============================================================================> ] 610,022,368 68.6MB/s eta 4s 76% [================================================================================> ] 624,226,708 68.4MB/s eta 4s 78% [==================================================================================> ] 638,495,908 68.4MB/s eta 3s 80% [====================================================================================> ] 652,765,108 68.4MB/s eta 3s 81% [======================================================================================> ] 667,293,748 68.5MB/s eta 3s 83% [========================================================================================> ] 681,845,448 68.5MB/s eta 3s 85% [==========================================================================================> ] 696,415,888 68.5MB/s eta 3s 87% [============================================================================================> ] 710,879,668 68.6MB/s eta 1s 88% [==============================================================================================> ] 725,278,588 68.5MB/s eta 1s 90% [================================================================================================> ] 739,462,316 68.5MB/s eta 1s 92% [=================================================================================================> ] 753,752,128 68.4MB/s eta 1s 94% [===================================================================================================> ] 768,086,188 68.5MB/s eta 1s 95% [=====================================================================================================> ] 782,614,828 68.4MB/s eta 0s 97% [=======================================================================================================> ] 797,143,468 68.5MB/s eta 0s 99% [=========================================================================================================> ] 811,477,528 68.5MB/s eta 0s 100%[==========================================================================================================>] 815,585,330 68.4MB/s in 12s + 91% [==========================================================================================================================> ] 232,219,579 34.9MB/s eta 2s 94% [==============================================================================================================================> ] 240,023,479 35.0MB/s eta 2s 97% [==================================================================================================================================> ] 247,806,679 35.1MB/s eta 0s 100%[=====================================================================================================================================>] 252,907,541 35.3MB/s in 8.6s -2023-03-23 20:30:54 (67.6 MB/s) - ‘/depot/davisjam/data/vishal/datasets/coco/val2017.zip’ saved [815585330/815585330] +2023-06-18 01:02:37 (28.0 MB/s) - ‘/depot/davisjam/data/vishal/datasets/coco/annotations_trainval2017.zip’ saved [252907541/252907541] diff --git a/models/official/projects/maskformer/dataloaders/__pycache__/input_reader.cpython-39.pyc b/models/official/projects/maskformer/dataloaders/__pycache__/input_reader.cpython-39.pyc index 289843bc..367229f8 100644 Binary files a/models/official/projects/maskformer/dataloaders/__pycache__/input_reader.cpython-39.pyc and b/models/official/projects/maskformer/dataloaders/__pycache__/input_reader.cpython-39.pyc differ diff --git a/models/official/projects/maskformer/dataloaders/__pycache__/panoptic_input.cpython-39.pyc b/models/official/projects/maskformer/dataloaders/__pycache__/panoptic_input.cpython-39.pyc index b763aa0b..ff75046c 100644 Binary files a/models/official/projects/maskformer/dataloaders/__pycache__/panoptic_input.cpython-39.pyc and b/models/official/projects/maskformer/dataloaders/__pycache__/panoptic_input.cpython-39.pyc differ diff --git a/models/official/projects/maskformer/dataloaders/factory.py b/models/official/projects/maskformer/dataloaders/factory.py index 187d54f5..0389e273 100644 --- a/models/official/projects/maskformer/dataloaders/factory.py +++ b/models/official/projects/maskformer/dataloaders/factory.py @@ -54,4 +54,4 @@ def parser_generator(params, mode): else: raise ValueError('Parser %s is not supported.' % params.architecture.parser) - return parser_fn + return parser_fn \ No newline at end of file diff --git a/models/official/projects/maskformer/dataloaders/input_reader.py b/models/official/projects/maskformer/dataloaders/input_reader.py index 8bd35c0d..2da163c0 100644 --- a/models/official/projects/maskformer/dataloaders/input_reader.py +++ b/models/official/projects/maskformer/dataloaders/input_reader.py @@ -14,27 +14,20 @@ """Data loader and input processing.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function +from official.core import config_definitions as cfg from typing import Optional, Text import tensorflow as tf -# import factory -# from official.projects.configs import mode_keys as ModeKeys -from official.modeling.hyperparams import params_dict class InputFn(object): """Input function that creates dataset from files.""" def __init__(self, - file_pattern: Text, - params: params_dict.ParamsDict, - mode: Text, - batch_size: int, + params: cfg.DataConfig, + dataset_fn, + parser_fn, num_examples: Optional[int] = -1): """Initialize. - Args: file_pattern: the file pattern for the data example (TFRecords). params: the parameter object for constructing example parser and model. @@ -44,16 +37,18 @@ def __init__(self, tf.errors.OutOfRangeError after that. If non-positive, it will be ignored. """ - assert file_pattern is not None - assert mode is not None - assert batch_size is not None - self._file_pattern = file_pattern - self._mode = mode - self._is_training = (mode == ModeKeys.TRAIN) - self._batch_size = batch_size + self._is_training = params.is_training + self._file_pattern = params.input_path + + self._batch_size = params.global_batch_size + self._shuffle_buffer_size = params.shuffle_buffer_size self._num_examples = num_examples - self._parser_fn = factory.parser_generator(params, mode) - self._dataset_fn = tf.data.TFRecordDataset + self._parser_fn = parser_fn + + self._dataset_fn = dataset_fn + if dataset_fn is None: + + self._dataset_fn = tf.data.TFRecordDataset self._input_sharding = (not self._is_training) try: @@ -66,24 +61,23 @@ def __init__(self, def __call__(self, ctx=None, batch_size: int = None): """Provides tf.data.Dataset object. - Args: ctx: context object. batch_size: expected batch size input data. - Returns: tf.data.Dataset object. """ if not batch_size: batch_size = self._batch_size assert batch_size is not None - dataset = tf.data.Dataset.list_files( - self._file_pattern, shuffle=self._is_training) - + dataset = tf.data.Dataset.list_files(self._file_pattern, + shuffle=self._is_training) + + if self._input_sharding and ctx and ctx.num_input_pipelines > 1: dataset = dataset.shard(ctx.num_input_pipelines, ctx.input_pipeline_id) dataset = dataset.cache() - + if self._is_training: dataset = dataset.repeat() @@ -91,15 +85,17 @@ def __call__(self, ctx=None, batch_size: int = None): map_func=self._dataset_fn, cycle_length=32, num_parallel_calls=tf.data.experimental.AUTOTUNE) - + if self._is_training: - dataset = dataset.shuffle(1000) + dataset = dataset.shuffle(self._shuffle_buffer_size) if self._num_examples > 0: dataset = dataset.take(self._num_examples) - + # Parses the fetched records to input tensors for model function. + dataset = dataset.map( self._parser_fn, num_parallel_calls=tf.data.experimental.AUTOTUNE) + dataset = dataset.batch(batch_size, drop_remainder=True) dataset = dataset.prefetch(tf.data.experimental.AUTOTUNE) - return dataset + return dataset \ No newline at end of file diff --git a/models/official/projects/maskformer/dataloaders/panoptic_input.py b/models/official/projects/maskformer/dataloaders/panoptic_input.py index b9524967..a9c16bcd 100644 --- a/models/official/projects/maskformer/dataloaders/panoptic_input.py +++ b/models/official/projects/maskformer/dataloaders/panoptic_input.py @@ -18,14 +18,148 @@ import numpy as np import tensorflow as tf - -from official.vision.configs import common +from loguru import logger from official.vision.dataloaders import parser from official.vision.dataloaders import tf_example_decoder from official.vision.ops import augment from official.vision.ops import preprocess_ops -from official.projects.maskformer.dataloaders import input_reader -# from official.projects.maskformer.configs import mode_keys as ModeKeys +from official.core import config_definitions as cfg +tf.compat.v1.enable_eager_execution() +COCO_CATEGORIES = [ + {"color": [220, 20, 60], "isthing": 1, "id": 1, "name": "person"}, + {"color": [119, 11, 32], "isthing": 1, "id": 2, "name": "bicycle"}, + {"color": [0, 0, 142], "isthing": 1, "id": 3, "name": "car"}, + {"color": [0, 0, 230], "isthing": 1, "id": 4, "name": "motorcycle"}, + {"color": [106, 0, 228], "isthing": 1, "id": 5, "name": "airplane"}, + {"color": [0, 60, 10], "isthing": 1, "id": 6, "name": "bus"}, + {"color": [0, 80, 100], "isthing": 1, "id": 7, "name": "train"}, + {"color": [0, 0, 70], "isthing": 1, "id": 8, "name": "truck"}, + {"color": [0, 0, 192], "isthing": 1, "id": 9, "name": "boat"}, + {"color": [250, 170, 30], "isthing": 1, "id": 10, "name": "traffic light"}, + {"color": [100, 170, 30], "isthing": 1, "id": 11, "name": "fire hydrant"}, + {"color": [220, 220, 0], "isthing": 1, "id": 13, "name": "stop sign"}, + {"color": [175, 116, 175], "isthing": 1, "id": 14, "name": "parking meter"}, + {"color": [250, 0, 30], "isthing": 1, "id": 15, "name": "bench"}, + {"color": [165, 42, 42], "isthing": 1, "id": 16, "name": "bird"}, + {"color": [255, 77, 255], "isthing": 1, "id": 17, "name": "cat"}, + {"color": [0, 226, 252], "isthing": 1, "id": 18, "name": "dog"}, + {"color": [182, 182, 255], "isthing": 1, "id": 19, "name": "horse"}, + {"color": [0, 82, 0], "isthing": 1, "id": 20, "name": "sheep"}, + {"color": [120, 166, 157], "isthing": 1, "id": 21, "name": "cow"}, + {"color": [110, 76, 0], "isthing": 1, "id": 22, "name": "elephant"}, + {"color": [174, 57, 255], "isthing": 1, "id": 23, "name": "bear"}, + {"color": [199, 100, 0], "isthing": 1, "id": 24, "name": "zebra"}, + {"color": [72, 0, 118], "isthing": 1, "id": 25, "name": "giraffe"}, + {"color": [255, 179, 240], "isthing": 1, "id": 27, "name": "backpack"}, + {"color": [0, 125, 92], "isthing": 1, "id": 28, "name": "umbrella"}, + {"color": [209, 0, 151], "isthing": 1, "id": 31, "name": "handbag"}, + {"color": [188, 208, 182], "isthing": 1, "id": 32, "name": "tie"}, + {"color": [0, 220, 176], "isthing": 1, "id": 33, "name": "suitcase"}, + {"color": [255, 99, 164], "isthing": 1, "id": 34, "name": "frisbee"}, + {"color": [92, 0, 73], "isthing": 1, "id": 35, "name": "skis"}, + {"color": [133, 129, 255], "isthing": 1, "id": 36, "name": "snowboard"}, + {"color": [78, 180, 255], "isthing": 1, "id": 37, "name": "sports ball"}, + {"color": [0, 228, 0], "isthing": 1, "id": 38, "name": "kite"}, + {"color": [174, 255, 243], "isthing": 1, "id": 39, "name": "baseball bat"}, + {"color": [45, 89, 255], "isthing": 1, "id": 40, "name": "baseball glove"}, + {"color": [134, 134, 103], "isthing": 1, "id": 41, "name": "skateboard"}, + {"color": [145, 148, 174], "isthing": 1, "id": 42, "name": "surfboard"}, + {"color": [255, 208, 186], "isthing": 1, "id": 43, "name": "tennis racket"}, + {"color": [197, 226, 255], "isthing": 1, "id": 44, "name": "bottle"}, + {"color": [171, 134, 1], "isthing": 1, "id": 46, "name": "wine glass"}, + {"color": [109, 63, 54], "isthing": 1, "id": 47, "name": "cup"}, + {"color": [207, 138, 255], "isthing": 1, "id": 48, "name": "fork"}, + {"color": [151, 0, 95], "isthing": 1, "id": 49, "name": "knife"}, + {"color": [9, 80, 61], "isthing": 1, "id": 50, "name": "spoon"}, + {"color": [84, 105, 51], "isthing": 1, "id": 51, "name": "bowl"}, + {"color": [74, 65, 105], "isthing": 1, "id": 52, "name": "banana"}, + {"color": [166, 196, 102], "isthing": 1, "id": 53, "name": "apple"}, + {"color": [208, 195, 210], "isthing": 1, "id": 54, "name": "sandwich"}, + {"color": [255, 109, 65], "isthing": 1, "id": 55, "name": "orange"}, + {"color": [0, 143, 149], "isthing": 1, "id": 56, "name": "broccoli"}, + {"color": [179, 0, 194], "isthing": 1, "id": 57, "name": "carrot"}, + {"color": [209, 99, 106], "isthing": 1, "id": 58, "name": "hot dog"}, + {"color": [5, 121, 0], "isthing": 1, "id": 59, "name": "pizza"}, + {"color": [227, 255, 205], "isthing": 1, "id": 60, "name": "donut"}, + {"color": [147, 186, 208], "isthing": 1, "id": 61, "name": "cake"}, + {"color": [153, 69, 1], "isthing": 1, "id": 62, "name": "chair"}, + {"color": [3, 95, 161], "isthing": 1, "id": 63, "name": "couch"}, + {"color": [163, 255, 0], "isthing": 1, "id": 64, "name": "potted plant"}, + {"color": [119, 0, 170], "isthing": 1, "id": 65, "name": "bed"}, + {"color": [0, 182, 199], "isthing": 1, "id": 67, "name": "dining table"}, + {"color": [0, 165, 120], "isthing": 1, "id": 70, "name": "toilet"}, + {"color": [183, 130, 88], "isthing": 1, "id": 72, "name": "tv"}, + {"color": [95, 32, 0], "isthing": 1, "id": 73, "name": "laptop"}, + {"color": [130, 114, 135], "isthing": 1, "id": 74, "name": "mouse"}, + {"color": [110, 129, 133], "isthing": 1, "id": 75, "name": "remote"}, + {"color": [166, 74, 118], "isthing": 1, "id": 76, "name": "keyboard"}, + {"color": [219, 142, 185], "isthing": 1, "id": 77, "name": "cell phone"}, + {"color": [79, 210, 114], "isthing": 1, "id": 78, "name": "microwave"}, + {"color": [178, 90, 62], "isthing": 1, "id": 79, "name": "oven"}, + {"color": [65, 70, 15], "isthing": 1, "id": 80, "name": "toaster"}, + {"color": [127, 167, 115], "isthing": 1, "id": 81, "name": "sink"}, + {"color": [59, 105, 106], "isthing": 1, "id": 82, "name": "refrigerator"}, + {"color": [142, 108, 45], "isthing": 1, "id": 84, "name": "book"}, + {"color": [196, 172, 0], "isthing": 1, "id": 85, "name": "clock"}, + {"color": [95, 54, 80], "isthing": 1, "id": 86, "name": "vase"}, + {"color": [128, 76, 255], "isthing": 1, "id": 87, "name": "scissors"}, + {"color": [201, 57, 1], "isthing": 1, "id": 88, "name": "teddy bear"}, + {"color": [246, 0, 122], "isthing": 1, "id": 89, "name": "hair drier"}, + {"color": [191, 162, 208], "isthing": 1, "id": 90, "name": "toothbrush"}, + {"color": [255, 255, 128], "isthing": 0, "id": 92, "name": "banner"}, + {"color": [147, 211, 203], "isthing": 0, "id": 93, "name": "blanket"}, + {"color": [150, 100, 100], "isthing": 0, "id": 95, "name": "bridge"}, + {"color": [168, 171, 172], "isthing": 0, "id": 100, "name": "cardboard"}, + {"color": [146, 112, 198], "isthing": 0, "id": 107, "name": "counter"}, + {"color": [210, 170, 100], "isthing": 0, "id": 109, "name": "curtain"}, + {"color": [92, 136, 89], "isthing": 0, "id": 112, "name": "door-stuff"}, + {"color": [218, 88, 184], "isthing": 0, "id": 118, "name": "floor-wood"}, + {"color": [241, 129, 0], "isthing": 0, "id": 119, "name": "flower"}, + {"color": [217, 17, 255], "isthing": 0, "id": 122, "name": "fruit"}, + {"color": [124, 74, 181], "isthing": 0, "id": 125, "name": "gravel"}, + {"color": [70, 70, 70], "isthing": 0, "id": 128, "name": "house"}, + {"color": [255, 228, 255], "isthing": 0, "id": 130, "name": "light"}, + {"color": [154, 208, 0], "isthing": 0, "id": 133, "name": "mirror-stuff"}, + {"color": [193, 0, 92], "isthing": 0, "id": 138, "name": "net"}, + {"color": [76, 91, 113], "isthing": 0, "id": 141, "name": "pillow"}, + {"color": [255, 180, 195], "isthing": 0, "id": 144, "name": "platform"}, + {"color": [106, 154, 176], "isthing": 0, "id": 145, "name": "playingfield"}, + {"color": [230, 150, 140], "isthing": 0, "id": 147, "name": "railroad"}, + {"color": [60, 143, 255], "isthing": 0, "id": 148, "name": "river"}, + {"color": [128, 64, 128], "isthing": 0, "id": 149, "name": "road"}, + {"color": [92, 82, 55], "isthing": 0, "id": 151, "name": "roof"}, + {"color": [254, 212, 124], "isthing": 0, "id": 154, "name": "sand"}, + {"color": [73, 77, 174], "isthing": 0, "id": 155, "name": "sea"}, + {"color": [255, 160, 98], "isthing": 0, "id": 156, "name": "shelf"}, + {"color": [255, 255, 255], "isthing": 0, "id": 159, "name": "snow"}, + {"color": [104, 84, 109], "isthing": 0, "id": 161, "name": "stairs"}, + {"color": [169, 164, 131], "isthing": 0, "id": 166, "name": "tent"}, + {"color": [225, 199, 255], "isthing": 0, "id": 168, "name": "towel"}, + {"color": [137, 54, 74], "isthing": 0, "id": 171, "name": "wall-brick"}, + {"color": [135, 158, 223], "isthing": 0, "id": 175, "name": "wall-stone"}, + {"color": [7, 246, 231], "isthing": 0, "id": 176, "name": "wall-tile"}, + {"color": [107, 255, 200], "isthing": 0, "id": 177, "name": "wall-wood"}, + {"color": [58, 41, 149], "isthing": 0, "id": 178, "name": "water-other"}, + {"color": [183, 121, 142], "isthing": 0, "id": 180, "name": "window-blind"}, + {"color": [255, 73, 97], "isthing": 0, "id": 181, "name": "window-other"}, + {"color": [107, 142, 35], "isthing": 0, "id": 184, "name": "tree-merged"}, + {"color": [190, 153, 153], "isthing": 0, "id": 185, "name": "fence-merged"}, + {"color": [146, 139, 141], "isthing": 0, "id": 186, "name": "ceiling-merged"}, + {"color": [70, 130, 180], "isthing": 0, "id": 187, "name": "sky-other-merged"}, + {"color": [134, 199, 156], "isthing": 0, "id": 188, "name": "cabinet-merged"}, + {"color": [209, 226, 140], "isthing": 0, "id": 189, "name": "table-merged"}, + {"color": [96, 36, 108], "isthing": 0, "id": 190, "name": "floor-other-merged"}, + {"color": [96, 96, 96], "isthing": 0, "id": 191, "name": "pavement-merged"}, + {"color": [64, 170, 64], "isthing": 0, "id": 192, "name": "mountain-merged"}, + {"color": [152, 251, 152], "isthing": 0, "id": 193, "name": "grass-merged"}, + {"color": [208, 229, 228], "isthing": 0, "id": 194, "name": "dirt-merged"}, + {"color": [206, 186, 171], "isthing": 0, "id": 195, "name": "paper-merged"}, + {"color": [152, 161, 64], "isthing": 0, "id": 196, "name": "food-other-merged"}, + {"color": [116, 112, 0], "isthing": 0, "id": 197, "name": "building-other-merged"}, + {"color": [0, 114, 143], "isthing": 0, "id": 198, "name": "rock-merged"}, + {"color": [102, 102, 156], "isthing": 0, "id": 199, "name": "wall-other-merged"}, + {"color": [250, 141, 255], "isthing": 0, "id": 200, "name": "rug-merged"}, +] def _compute_gaussian_from_std(sigma): """Computes the Gaussian and its size from a given standard deviation.""" @@ -53,31 +187,49 @@ def __init__( regenerate_source_id=regenerate_source_id) self._panoptic_category_mask_key = panoptic_category_mask_key self._panoptic_instance_mask_key = panoptic_instance_mask_key - + self._panoptic_contigious_mask_key = 'image/panoptic/contiguous_mask' + self._class_ids_key = 'image/panoptic/class_ids' + self._image_height_key = 'image/height' + self._image_width_key = 'image/width' + self._image_key = "" self._panoptic_keys_to_features = { - panoptic_category_mask_key: + self._panoptic_category_mask_key: + tf.io.FixedLenFeature((), tf.string, default_value=''), + self._panoptic_instance_mask_key: + tf.io.FixedLenFeature((), tf.string, default_value=''), + self._panoptic_contigious_mask_key: tf.io.FixedLenFeature((), tf.string, default_value=''), - panoptic_instance_mask_key: - tf.io.FixedLenFeature((), tf.string, default_value='') + self._class_ids_key: + tf.io.VarLenFeature(tf.int64), } + def decode(self, serialized_example): decoded_tensors = super(TfExampleDecoder, self).decode(serialized_example) + + parsed_tensors = tf.io.parse_single_example( serialized_example, self._panoptic_keys_to_features) - - category_mask = tf.io.decode_image( + + category_mask = tf.io.decode_png( parsed_tensors[self._panoptic_category_mask_key], channels=1) - instance_mask = tf.io.decode_image( + instance_mask = tf.io.decode_png( parsed_tensors[self._panoptic_instance_mask_key], channels=1) + contigious_mask = tf.io.decode_png( + parsed_tensors[self._panoptic_contigious_mask_key], channels=1) + class_ids = parsed_tensors[self._class_ids_key] + category_mask.set_shape([None, None, 1]) instance_mask.set_shape([None, None, 1]) - + contigious_mask.set_shape([None, None, 1]) decoded_tensors.update({ 'groundtruth_panoptic_category_mask': category_mask, - 'groundtruth_panoptic_instance_mask': instance_mask + 'groundtruth_panoptic_instance_mask': instance_mask, + 'groundtruth_panoptic_contigious_mask': contigious_mask, + 'groundtruth_panoptic_class_ids': class_ids, }) + return decoded_tensors @@ -86,29 +238,10 @@ class mask_former_parser(parser.Parser): def __init__( self, - output_size: List[int] = None, - min_scale: float = 0.3, - aspect_ratio_range: List[float] = (0.5, 2.0), - min_overlap_params: List[float] = (0.0, 1.4, 0.2, 0.1), - max_retry: int = 50, - pad_output: bool = True, - resize_eval_groundtruth: bool = True, - groundtruth_padded_size: Optional[List[int]] = None, - ignore_label: int = 0, - aug_rand_hflip: bool = True, - aug_scale_min: float = 1.0, - aug_scale_max: float = 1.0, - color_aug_ssd: bool = False, - brightness: float = 0.2, - saturation: float = 0.3, - contrast: float = 0.5, - aug_type: Optional[common.Augmentation] = None, - sigma: float = 8.0, - small_instance_area_threshold: int = 4096, - small_instance_weight: float = 3.0, - dtype: str = 'float32', - seed: int = None, - mode: ModeKeys = None): + params: cfg.DataConfig, + decoder_fn = None, + is_training = False, + ): """Initializes parameters for parsing annotations in the dataset. Args: @@ -136,35 +269,42 @@ def __init__( """ # general settings + self._output_size = params.output_size + self._mask_null = 0 + self._dtype = params.dtype + self._pad_output = params.pad_output + self._seed = params.seed + # TODO + self._max_instances = 100 + self._decoder = decoder_fn - self._output_size = output_size - self._dtype = dtype - self._pad_output = pad_output - self._seed = seed - - self._decoder = TfExampleDecoder() + if self._pad_output == True and self._output_size is None: + raise Exception("Error: no output pad provided") + if self._decoder == None: + print("assuming default decoder") + self._decoder = TfExampleDecoder() - self._mode = mode - if mode == None: + self._is_training = is_training + if is_training == None: print("assuming training mode") - self._mode = ModeKeys.TRAIN + self._is_training = True - # Boxes: - self._resize_eval_groundtruth = resize_eval_groundtruth - if (not resize_eval_groundtruth) and (groundtruth_padded_size is None): + + self._resize_eval_groundtruth = params.resize_eval_groundtruth + if (not params.resize_eval_groundtruth) and (params.groundtruth_padded_size is None): raise ValueError( 'groundtruth_padded_size ([height, width]) needs to be' 'specified when resize_eval_groundtruth is False.') - self._groundtruth_padded_size = groundtruth_padded_size - self._ignore_label = ignore_label + self._groundtruth_padded_size = params.groundtruth_padded_size + self._ignore_label = params.ignore_label # Data augmentation - self._aug_rand_hflip = aug_rand_hflip - self._aug_scale_min = aug_scale_min - self._aug_scale_max = aug_scale_max + self._aug_rand_hflip = params.aug_rand_hflip + self._aug_scale_min = params.aug_scale_min + self._aug_scale_max = params.aug_scale_max # Auto Augment - if aug_type and aug_type.type: + if params.aug_type and aug_type.type: if aug_type.type == 'autoaug': self._augmenter = augment.AutoAugment( augmentation_name=aug_type.autoaug.augmentation_name, @@ -177,26 +317,27 @@ def __init__( self._augmenter = None #Cropping: - self._min_scale = min_scale - self._aspect_ratio_range = aspect_ratio_range - self._min_overlap_params = min_overlap_params - self._max_retry = max_retry + self._min_scale = params.min_scale + self._aspect_ratio_range = params.aspect_ratio_range + self._min_overlap_params = params.min_overlap_params + self._max_retry = params.max_retry # color augmentation - self._color_aug_ssd = color_aug_ssd - self._brightness = brightness - self._saturation = saturation - self._contrast = contrast + self._color_aug_ssd = params.color_aug_ssd + self._brightness = params.brightness + self._saturation = params.saturation + self._contrast = params.contrast - self._sigma = sigma + self._sigma = params.sigma self._gaussian, self._gaussian_size = _compute_gaussian_from_std( self._sigma) self._gaussian = tf.reshape(self._gaussian, shape=[-1]) - self._small_instance_area_threshold = small_instance_area_threshold - self._small_instance_weight = small_instance_weight + self._small_instance_area_threshold = params.small_instance_area_threshold + self._small_instance_weight = params.small_instance_weight + def _resize_and_crop_mask(self, mask, image_info, crop_dims, is_training): """Resizes and crops mask using `image_info` dict.""" @@ -205,15 +346,15 @@ def _resize_and_crop_mask(self, mask, image_info, crop_dims, is_training): offset = image_info[3, : ] im_height = int(image_info[0][0]) im_width = int(image_info[0][1]) - print(mask.shape) - print(im_height, im_width) + + # print(mask.shape) mask = tf.reshape(mask, shape=[1, im_height, im_width, 1]) - print(mask.shape) + # print(mask.shape) mask += 1 if is_training or self._resize_eval_groundtruth: - print("using image offset:",offset) + # print("using image offset:",offset) mask = preprocess_ops.resize_and_crop_masks( mask, image_scale, @@ -233,7 +374,8 @@ def _resize_and_crop_mask(self, mask, image_info, crop_dims, is_training): mask) mask = tf.squeeze(mask, axis=0) return mask - + + def _parse_data(self, data, is_training): image = data['image'] @@ -249,6 +391,10 @@ def _parse_data(self, data, is_training): instance_mask = tf.cast( data['groundtruth_panoptic_instance_mask'][:, :, 0], dtype=tf.float32) + contigious_mask = tf.cast(data['groundtruth_panoptic_contigious_mask'][:, :, 0], + dtype=tf.float32) + class_ids = tf.sparse.to_dense(data['groundtruth_panoptic_class_ids'], default_value=0) + class_ids = tf.cast(class_ids, dtype=tf.float32) # applies by pixel augmentation (saturation, brightness, contrast) if self._color_aug_ssd: @@ -261,8 +407,8 @@ def _parse_data(self, data, is_training): ) # Flips image randomly during training. if self._aug_rand_hflip and is_training: - print("doing random flip") - masks = tf.stack([category_mask, instance_mask], axis=0) + # print("doing random flip") + masks = tf.stack([category_mask, instance_mask, contigious_mask], axis=0) image, _, masks = preprocess_ops.random_horizontal_flip( image=image, masks=masks, @@ -270,17 +416,12 @@ def _parse_data(self, data, is_training): category_mask = masks[0] instance_mask = masks[1] - - - + contigious_mask = masks[2] # Resize and crops image. - print(category_mask.shape) - print(instance_mask.shape) - print(self._output_size) - masks = tf.stack([category_mask, instance_mask], axis=0) - masks = tf.expand_dims(masks, -1) - print("stacked masks:",masks.shape) + masks = tf.stack([category_mask, instance_mask, contigious_mask], axis=0) + masks = tf.expand_dims(masks, -1) + # Resizes and crops image. cropped_image, masks = preprocess_ops.random_crop_image_masks( img = image, @@ -295,24 +436,19 @@ def _parse_data(self, data, is_training): category_mask = tf.squeeze(masks[0]) instance_mask = tf.squeeze(masks[1]) + contigious_mask = tf.squeeze(masks[2]) - print("categorical shape:",category_mask.shape) - print("instance shape:",instance_mask.shape) - print("image shape:",cropped_image.shape) crop_im_size = tf.cast(tf.shape(cropped_image)[0:2], tf.int32) - - print("using padding:", self._output_size) - # resize and pad image from random crop + + # Resize image image, image_info = preprocess_ops.resize_and_crop_image( cropped_image, self._output_size if self._pad_output else crop_im_size, self._output_size if self._pad_output else crop_im_size, - aug_scale_min=self._aug_scale_min if self._pad_output or not self._mode == ModeKeys.TRAIN else 1.0, - aug_scale_max=self._aug_scale_max if self._pad_output or not self._mode == ModeKeys.TRAIN else 1.0) - - print("image info:", image_info) - # resize masks according to image + aug_scale_min=self._aug_scale_min if self._pad_output or not self._is_training else 1.0, + aug_scale_max=self._aug_scale_max if self._pad_output or not self._is_training else 1.0) + category_mask = self._resize_and_crop_mask( category_mask, image_info, @@ -323,34 +459,45 @@ def _parse_data(self, data, is_training): image_info, self._output_size if self._pad_output else crop_im_size, is_training=is_training) - (instance_centers_heatmap, - instance_centers_offset, - semantic_weights) = self._encode_centers_and_offets( - instance_mask=instance_mask[:, :, 0]) + contigious_mask = self._resize_and_crop_mask( + contigious_mask, + image_info, + self._output_size if self._pad_output else crop_im_size, + is_training=is_training) + + individual_masks = self._get_individual_masks( + class_ids=class_ids,contig_instance_mask=contigious_mask) + + + # Resize image and masks to output size. + image = tf.image.resize(image, self._output_size, method='nearest') + category_mask = tf.image.resize(category_mask, self._output_size, method='nearest') + instance_mask = tf.image.resize(instance_mask, self._output_size, method='nearest') + individual_masks = tf.image.resize(individual_masks, self._output_size, method='nearest') - # Cast image and labels as self._dtype + unique_ids = preprocess_ops.clip_or_pad_to_fixed_size( + class_ids, self._max_instances) + image = tf.cast(image, dtype=self._dtype) category_mask = tf.cast(category_mask, dtype=self._dtype) instance_mask = tf.cast(instance_mask, dtype=self._dtype) - instance_centers_heatmap = tf.cast( - instance_centers_heatmap, dtype=self._dtype) - instance_centers_offset = tf.cast( - instance_centers_offset, dtype=self._dtype) + individual_masks = tf.cast(individual_masks, dtype=self._dtype) + unique_ids = tf.cast(unique_ids, dtype=self._dtype) valid_mask = tf.not_equal( category_mask, self._ignore_label) things_mask = tf.not_equal( instance_mask, self._ignore_label) + labels = { 'category_mask': category_mask, 'instance_mask': instance_mask, - 'instance_centers_heatmap': instance_centers_heatmap, - 'instance_centers_offset': instance_centers_offset, - 'semantic_weights': semantic_weights, 'valid_mask': valid_mask, 'things_mask': things_mask, - 'image_info': image_info + 'image_info': image_info, + 'unique_ids': unique_ids, + 'individual_masks': individual_masks, } return image, labels @@ -362,112 +509,48 @@ def _parse_eval_data(self, data): """Parses data for evaluation.""" return self._parse_data(data=data, is_training=False) - def _encode_centers_and_offets(self, instance_mask): - """Generates center heatmaps and offets from instance id mask. - Args: - instance_mask: `tf.Tensor` of shape [height, width] representing - groundtruth instance id mask. - Returns: - instance_centers_heatmap: `tf.Tensor` of shape [height, width, 1] - instance_centers_offset: `tf.Tensor` of shape [height, width, 2] - """ - shape = tf.shape(instance_mask) - height, width = shape[0], shape[1] - - padding_start = int(3 * self._sigma + 1) - padding_end = int(3 * self._sigma + 2) - - # padding should be equal to self._gaussian_size which is calculated - # as size = int(6 * sigma + 3) - padding = padding_start + padding_end - - instance_centers_heatmap = tf.zeros( - shape=[height + padding, width + padding], - dtype=tf.float32) - centers_offset_y = tf.zeros( - shape=[height, width], - dtype=tf.float32) - centers_offset_x = tf.zeros( - shape=[height, width], - dtype=tf.float32) - semantic_weights = tf.ones( - shape=[height, width], - dtype=tf.float32) - - unique_instance_ids, _ = tf.unique(tf.reshape(instance_mask, [-1])) - - # The following method for encoding center heatmaps and offets is inspired - # by the reference implementation available at - # https://github.com/google-research/deeplab2/blob/main/data/sample_generator.py # pylint: disable=line-too-long - for instance_id in unique_instance_ids: - if instance_id == self._ignore_label: - continue - - mask = tf.equal(instance_mask, instance_id) - mask_area = tf.reduce_sum(tf.cast(mask, dtype=tf.float32)) - mask_indices = tf.cast(tf.where(mask), dtype=tf.float32) - mask_center = tf.reduce_mean(mask_indices, axis=0) - mask_center_y = tf.cast(tf.round(mask_center[0]), dtype=tf.int32) - mask_center_x = tf.cast(tf.round(mask_center[1]), dtype=tf.int32) - - if mask_area < self._small_instance_area_threshold: - semantic_weights = tf.where( - mask, - self._small_instance_weight, - semantic_weights) - - gaussian_size = self._gaussian_size - indices_y = tf.range(mask_center_y, mask_center_y + gaussian_size) - indices_x = tf.range(mask_center_x, mask_center_x + gaussian_size) - - indices = tf.stack(tf.meshgrid(indices_y, indices_x)) - indices = tf.reshape( - indices, shape=[2, gaussian_size * gaussian_size]) - indices = tf.transpose(indices) - - instance_centers_heatmap = tf.tensor_scatter_nd_max( - tensor=instance_centers_heatmap, - indices=indices, - updates=self._gaussian) - - centers_offset_y = tf.tensor_scatter_nd_update( - tensor=centers_offset_y, - indices=tf.cast(mask_indices, dtype=tf.int32), - updates=tf.cast(mask_center_y, dtype=tf.float32) - mask_indices[:, 0]) - - centers_offset_x = tf.tensor_scatter_nd_update( - tensor=centers_offset_x, - indices=tf.cast(mask_indices, dtype=tf.int32), - updates=tf.cast(mask_center_x, dtype=tf.float32) - mask_indices[:, 1]) - - instance_centers_heatmap = instance_centers_heatmap[ - padding_start:padding_start + height, - padding_start:padding_start + width] - instance_centers_heatmap = tf.expand_dims(instance_centers_heatmap, axis=-1) - - instance_centers_offset = tf.stack( - [centers_offset_y, centers_offset_x], - axis=-1) - - return (instance_centers_heatmap, - instance_centers_offset, - semantic_weights) + def _get_individual_masks(self, class_ids, contig_instance_mask): + + individual_mask_list = tf.TensorArray(tf.float32, size=self._max_instances) + counter = 0 + + for class_id in class_ids: + mask = tf.equal(contig_instance_mask, class_id) + individual_mask_list = individual_mask_list.write(counter, tf.cast(mask, tf.float32)) + counter += 1 + + for idx in tf.range(100-tf.size(class_ids)): + new_mask = tf.zeros(tf.shape(contig_instance_mask)) + individual_mask_list = individual_mask_list.write(counter, tf.cast(new_mask, tf.float32)) + + # individual_masks = tf.zeros([self._max_instances, self._output_size[0], self._output_size[1], 1], dtype=tf.float32) + # unique_instance_ids, _ = tf.unique(tf.reshape(instance_mask, [-1])) + # individual_mask_list = tf.TensorArray(tf.float32, size=100) + # counter = 0 + # for instance_id in unique_instance_ids: + + # mask = tf.equal(instance_mask, instance_id) + # individual_mask_list = individual_mask_list.write(counter, tf.expand_dims(tf.cast(mask, tf.float32), axis=2)) + # counter += 1 + + # return (unique_instance_ids, individual_mask_list.stack()) + # tf.zeros([self._max_instances, self._output_size[0], self._output_size[1], 1], dtype=tf.float32) + + return individual_mask_list.stack() def __call__(self, value): """Parses data to an image and associated training labels. Args: value: a string tensor holding a serialized tf.Example proto. Returns: - image, labels: if mode == ModeKeys.TRAIN. see _parse_train_data. - {'images': image, 'labels': labels}: if mode == ModeKeys.PREDICT - or ModeKeys.PREDICT_WITH_GT. + image, labels: if is_training, see _parse_train_data. + {'images': image, 'labels': labels}: if is_training """ - with tf.name_scope('parser'): - data = self._decoder.decode(value) - - if self._mode == ModeKeys.TRAIN: + data = self._decoder(value) + + if self._is_training: return self._parse_train_data(data) else: - return self._parse_eval_data(data) \ No newline at end of file + return self._parse_eval_data(data) diff --git a/models/official/projects/maskformer/losses/.ipynb_checkpoints/inference-checkpoint.py b/models/official/projects/maskformer/losses/.ipynb_checkpoints/inference-checkpoint.py new file mode 100644 index 00000000..6b326e13 --- /dev/null +++ b/models/official/projects/maskformer/losses/.ipynb_checkpoints/inference-checkpoint.py @@ -0,0 +1,67 @@ +import tensorflow as tf + +class PanopticInference(): + def call(self, mask_true, mask_pred): + probs = tf.keras.activations.softmax(mask_true, axis=-1) + scores = tf.reduce_max(probs, axis=-1) + labels = tf.argmax(probs, axis=-1) + mask_pred = tf.keras.activations.sigmoid(mask_pred) + + config_num_classes = 171 + object_mask_threshold = 0.0 + keep = tf.math.logical_and(tf.math.not_equal(labels, config_num_classes), scores > object_mask_threshold) + curr_scores = scores[keep] + curr_classes = labels[keep] + + curr_masks = mask_pred[keep] + curr_mask_cls = mask_true[keep] + curr_mask_cls = tf.slice(curr_mask_cls, [0, 0], [-1, curr_mask_cls.shape[1] - 1]) + + curr_prob_masks = tf.reshape(curr_scores, [-1, 1, 1]) * curr_masks + + height, width = tf.shape(curr_masks)[-2:] + + with tf.device(curr_masks.device): + panoptic_seg = tf.zeros((height, width), dtype=tf.int32) + segments_info = [] + + current_segment_id = 0 + + if tf.shape(curr_masks)[0] == 0: + return panoptic_seg, segments_info + else: + curr_masks_ids = tf.argmax(curr_prob_masks, axis=0) + stuff_memory = {} + + for k in range(curr_classes.shape[0]): + pred_class = curr_classes[k].numpy() + # is_thing = pred_class in self.metadata.thing_dataset_id_to_contiguous_id.values() + is_thing = True # TODO(ibrahim): FIX when get configs. + + mask = curr_masks_ids == k + mask_area = tf.reduce_sum(mask).numpy() + original_area = tf.reduce_sum(curr_masks[k] >= 0.5).numpy() + + if mask_area > 0 and original_area > 0: + config_overlap_threshold = 0.8 + if mask_area / original_area < config_overlap_threshold: + continue + + if not is_thing: + if int(pred_class) in stuff_memory: + panoptic_seg[mask] = stuff_memory[int(pred_class)] + continue + else: + stuff_memory[int(pred_class)] = current_segment_id + 1 + + current_segment_id += 1 + panoptic_seg[mask] = current_segment_id + + segments_info.append({ + "id": current_segment_id, + "is_thing": bool(is_thing), + "category_id": int(pred_class), + }) + + return panoptic_seg, segments_info + diff --git a/models/official/projects/maskformer/losses/.ipynb_checkpoints/inference_test-checkpoint.py b/models/official/projects/maskformer/losses/.ipynb_checkpoints/inference_test-checkpoint.py new file mode 100644 index 00000000..afa1e412 --- /dev/null +++ b/models/official/projects/maskformer/losses/.ipynb_checkpoints/inference_test-checkpoint.py @@ -0,0 +1,30 @@ +from absl.testing import parameterized +import tensorflow as tf + +from official.projects.maskformer.losses.inference import PanopticInference +from official.projects.maskformer.maskformer import MaskFormer + +class PanopticInferenceTest(tf.test.TestCase, parameterized.TestCase): + @parameterized.named_parameters(('test1',)) + def test_pass_through(self): + model = MaskFormer() + + input_image = tf.ones((1, 640, 640, 3)) + + expected_class_probs_shape = [1, 100, 172] + expected_mask_probs_shape = [1, 160, 160, 100] + + output = model(input_image) + self.assertAllEqual( + output["class_prob_predictions"].shape.as_list(), expected_class_probs_shape) + self.assertAllEqual( + output["mask_prob_predictions"].shape.as_list(), expected_mask_probs_shape) + + out = PanopticInference().call(mask_true=output["class_prob_predictions"], mask_pred=output["mask_prob_predictions"]) + print(out) + + + + +if __name__ == '__main__': + tf.test.main() diff --git a/models/official/projects/maskformer/losses/.ipynb_checkpoints/maskformer_losses-checkpoint.py b/models/official/projects/maskformer/losses/.ipynb_checkpoints/maskformer_losses-checkpoint.py new file mode 100644 index 00000000..4bfe5edd --- /dev/null +++ b/models/official/projects/maskformer/losses/.ipynb_checkpoints/maskformer_losses-checkpoint.py @@ -0,0 +1,292 @@ +import tensorflow as tf +from official.vision.losses import focal_loss +from official.projects.detr.ops import matchers +from loguru import logger +tf.compat.v1.enable_eager_execution() + +def _max_by_axis(the_list): + all_max = the_list[0] + for sublist in the_list[1:]: + for idx, item in enumerate(sublist): + all_max[idx] = max(all_max[idx], item) + return all_max + +class NestedTensor(object): + def __init__(self, tensors, mask=None): + self.tensors = tf.convert_to_tensor(tensors) + self.mask = tf.convert_to_tensor(mask) if mask is not None else None + + def to(self, device): + with tf.device(device): + cast_tensor = tf.identity(self.tensors) + cast_mask = tf.identity(self.mask) if self.mask is not None else None + return NestedTensor(cast_tensor, cast_mask) + + def decompose(self): + return self.tensors, self.mask + + def __repr__(self): + return str(self.tensors) + + +def nested_tensor_from_tensor_list(tensor_list): + if tf.rank(tensor_list[0]).numpy() == 3: + max_size = _max_by_axis([list(img.shape) for img in tensor_list]) + + batch_shape = [len(tensor_list)] + max_size + b, c, h, w = batch_shape + + dtype = tensor_list[0].dtype + device = tensor_list[0].device + + with tf.device(device): + tensor = tf.zeros(batch_shape, dtype=dtype) + mask = tf.ones((b, h, w), dtype=tf.bool) + + for img, pad_img, m in zip(tensor_list, tensor, mask): + pad_img = tf.Variable(pad_img) + pad_img[:img.shape[0], :img.shape[1], :img.shape[2]].assign(img) + pad_img = tf.convert_to_tensor(pad_img) + + m = tf.Variable(m) + false_tensor = tf.zeros((img.shape[1], img.shape[2]), dtype=tf.bool) + m[:img.shape[1], :img.shape[2]].assign(false_tensor) + m = tf.convert_to_tensor(m) + return NestedTensor(tensor, mask) + else: + raise ValueError("not supported") + + +class FocalLossMod(focal_loss.FocalLoss): + """Implements a Focal loss for segmentation problems. + Reference: + [Per-Pixel Classification is Not All You Need for Semantic Segmentation](https://arxiv.org/abs/2107.06278). + """ + + def __init__(self, alpha=0.25, gamma=2): + """Initializes `FocalLoss`. + Args: + alpha: The `alpha` weight factor for binary class imbalance. + gamma: The `gamma` focusing parameter to re-weight loss. + reduction and name? + """ + super().__init__(alpha, gamma, reduction='none') + # self.background_indices = background_indices + + def call(self, y_true, y_pred): + """Invokes the `FocalLoss`. + Args: + y_true: A tensor of size [batch, num_anchors, num_classes]. + Stores the binary classification lavel for each element in y_pred. + y_pred: A tensor of size [batch, num_anchors, num_classes]. + The predictions of each example. + num_masks: The number of masks. + + Returns: + Loss float `Tensor`. + """ + # background_indices = tf.expand_dims(self.background_indices, axis=0) + weighted_loss = super().call(y_true, y_pred) + # print("weighted loss :", weighted_loss.shape) #(1, 100, 442368) + # mean over all pixels + loss = tf.math.reduce_mean(weighted_loss, axis=-1) + # logger.debug("loss shape: {}".format(loss.shape)) + # logger.debug("loss: {}".format(loss)) + return loss + + def batch(self, y_true, y_pred): + """ + y_true: (b_size, 100 (num objects), h*w) + y_pred: (b_size, 100 (num objects), h*w) + """ + hw = tf.cast(tf.shape(y_pred)[-1], dtype=tf.float32) #[100, h, w] + prob = tf.keras.activations.sigmoid(y_pred) + focal_pos = tf.pow(1 - prob, self._gamma) * tf.nn.sigmoid_cross_entropy_with_logits(labels=tf.ones_like(y_pred), logits=y_pred) + focal_neg = tf.pow(prob, self._gamma) * tf.nn.sigmoid_cross_entropy_with_logits(labels=tf.zeros_like(y_pred), logits=y_pred) + + if self._alpha >= 0: + focal_pos = focal_pos * self._alpha + focal_neg = focal_neg * (1 - self._alpha) + loss = tf.einsum("bnc,bmc->bnm", focal_pos, y_true) + tf.einsum( + "bnc,bmc->bnm", focal_neg, (1 - y_true) + ) + return loss / hw + + + +class DiceLoss(tf.keras.losses.Loss): + + def __init__(self): + super().__init__(reduction='none') + + def call(self, y_true, y_pred): + """ + y_true: (b size, 100, h*w) + """ + + y_pred = tf.reshape(tf.keras.activations.sigmoid(y_pred), (y_pred.shape[0],y_pred.shape[1],-1)) + y_true = tf.reshape(y_true, (y_true.shape[0],tf.shape(y_true)[1],-1)) + + numerator = 2 * tf.reduce_sum(y_pred * y_true, axis=-1) + denominator = tf.reduce_sum(y_pred, axis=-1) + tf.reduce_sum(y_true, axis=-1) + loss = 1 - (numerator + 1) / (denominator + 1) + + return loss + + def batch(self, y_true, y_pred): + # y_pred = tf.keras.activations.sigmoid(y_pred) + y_pred = tf.sigmoid(y_pred) + y_pred = tf.reshape(y_pred, [y_pred.shape[0], -1, y_pred.shape[1]]) + y_pred = tf.transpose(y_pred, [0, 2, 1]) + + numerator = 2 * tf.einsum("bnc,bmc->bnm", y_pred, y_true) + denominator = tf.reduce_sum(y_pred, axis=-1)[:, tf.newaxis] + tf.expand_dims(tf.reduce_sum(y_true, axis=-1), axis=-1) + + + loss = 1 - (numerator + 1) / (denominator + 1) + + return loss + +class Loss: + def __init__(self, num_classes, matcher, eos_coef, cost_class = 1, cost_focal = 1, cost_dice = 1): + + self.num_classes = num_classes + self.matcher = matcher + self.eos_coef = eos_coef + self.cost_class = cost_class + self.cost_focal = cost_focal + self.cost_dice = cost_dice + + + def memory_efficient_matcher(self, outputs, y_true): + batch_size, num_queries = outputs["pred_logits"].shape[:2] + out_mask = outputs["pred_masks"] + out_mask = tf.transpose(out_mask, perm=[0,3,1,2]) + + tgt_ids = tf.cast(y_true["unique_ids"], dtype=tf.int64) + + with tf.device(out_mask.device): + tgt_mask = y_true["individual_masks"] + tgt_mask = tf.transpose(tgt_mask, perm=[0,2,3,1,4]) # [b, 100, h, w, 1] + cost_class = tf.gather(-tf.nn.softmax(outputs["pred_logits"]), tgt_ids, batch_dims=1, axis=-1) + tgt_mask = tf.squeeze(tf.cast(tgt_mask, dtype=tf.float32),axis=-1) + + tgt_mask = tf.image.resize(tgt_mask, out_mask.shape[-2:], method='bilinear') # [b, h, w, 100] + + out_mask = tf.transpose(out_mask, perm=[0,2,3,1]) # [b, h, w, 100] + + out_mask = tf.reshape(out_mask, [tf.shape(out_mask)[0], tf.shape(out_mask)[-1], -1]) # [b, 100, h*w] + tgt_mask = tf.reshape(tgt_mask, [tf.shape(tgt_mask)[0],tf.shape(tgt_mask)[-1], -1]) # [b, 100, h*w] + + cost_focal = FocalLossMod().batch(tgt_mask, out_mask) + cost_dice = DiceLoss().batch(tgt_mask, out_mask) + + total_cost = ( + self.cost_focal * cost_focal + + self.cost_class * cost_class + + self.cost_dice * cost_dice + ) + + _, inds = matchers.hungarian_matching(total_cost) + return inds + + + + def get_loss(self, batch_size, outputs, y_true, indices): + + target_index = tf.math.argmax(indices, axis=1) #[batchsize, 100] + target_labels = y_true["unique_ids"] #[batchsize, num_gt_objects] + cls_outputs = outputs["pred_logits"] # [batchsize, num_queries, num_classes] [1,100,134] + cls_masks = outputs["pred_masks"]# [batchsize, h, w, num_queries] + individual_masks = y_true["individual_masks"] # [batchsize, num_gt_objects, h, w, 1] + + + + cls_assigned = tf.gather(cls_outputs, target_index, batch_dims=1, axis=1) + mask_assigned = tf.gather(cls_masks, target_index, batch_dims=1, axis=1) + + target_classes = tf.cast(target_labels, dtype=tf.int32) + background = tf.equal(target_classes, 0) # Pytorch padds 133 class number where classes are background + + num_masks = tf.reduce_sum(tf.cast(tf.logical_not(background), tf.float32), axis=-1) + ######################################################################################################## + # TODO: check if we need this! + # if Utils.is_dist_avail_and_initialized(): + # num_masks = tf.distribute.get_strategy().reduce(tf.distribute.ReduceOp.SUM, num_masks, axis=None) + # num_masks = tf.maximum(num_masks / tf.distribute.get_strategy().num_replicas_in_sync, 1.0) + ######################################################################################################### + + xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=target_classes, logits=cls_assigned) + cls_loss = self.cost_class * tf.where(background, 0.1 * xentropy, xentropy) + cls_weights = tf.where(background, 0.1 * tf.ones_like(cls_loss), tf.ones_like(cls_loss)) + + num_masks_per_replica = tf.reduce_sum(num_masks) + cls_weights_per_replica = tf.reduce_sum(cls_weights) + replica_context = tf.distribute.get_replica_context() + num_masks_sum, cls_weights_sum = replica_context.all_reduce(tf.distribute.ReduceOp.SUM,[num_masks_per_replica, cls_weights_per_replica]) + + # Final losses + cls_loss = tf.math.divide_no_nan(tf.reduce_sum(cls_loss), cls_weights_sum) + losses = {'focal_loss' : 0.0, 'dice_loss': 0.0} + + + out_mask = tf.transpose(cls_masks, perm=[0,3,1,2]) + with tf.device(out_mask.device): + tgt_mask = individual_masks + + tgt_mask = tf.transpose(tgt_mask, perm=[0,2,3,1,4]) + tgt_mask = tf.squeeze(tf.cast(tgt_mask, dtype=tf.float32),axis=-1) + tgt_mask = tf.image.resize(tgt_mask, out_mask.shape[-2:], method='bilinear') # [b, h, w, 100] + out_mask = tf.transpose(out_mask, perm=[0,2,3,1]) # [b, h, w, 100] + + out_mask = tf.reshape(out_mask, [tf.shape(out_mask)[0], tf.shape(out_mask)[-1], -1]) # [b, 100, h*w] + tgt_mask = tf.reshape(tgt_mask, [tf.shape(tgt_mask)[0],tf.shape(tgt_mask)[-1], -1]) + focal_loss = FocalLossMod()(tgt_mask, out_mask) + dice_loss = DiceLoss()(tgt_mask, out_mask) + + + losses['focal_loss'] = focal_loss + losses['dice_loss'] = dice_loss + background_new = background + + focal_loss_weighted = tf.where(background_new, tf.zeros_like(focal_loss), focal_loss) + dice_loss_weighted = tf.where(background_new, tf.zeros_like(dice_loss), dice_loss) + focal_loss_final = tf.math.divide_no_nan(tf.math.reduce_sum(focal_loss_weighted), num_masks_sum) + dice_loss_final = tf.math.divide_no_nan(tf.math.reduce_sum(dice_loss_weighted), num_masks_sum) + + + return cls_loss, focal_loss_final, dice_loss_final + + def __call__(self, outputs, y_true): + """ + This performs the loss computation. + Parameters: + outputs: dict of tensors, see the output specification of the model for the format + y_true: list of dicts, such that len(y_true) == batch_size. + The expected keys in each dict depends on the losses applied, see each loss' doc + """ + outputs_without_aux = {k: v for k, v in outputs.items() if k != "aux_outputs"} + batch_size, num_queries = outputs["pred_logits"].shape[:2] + indices = self.memory_efficient_matcher(outputs_without_aux, y_true) # (batchsize, num_queries, num_queries) + + losses = {} + + cls_loss_final, focal_loss_final, dice_loss_final = self.get_loss(batch_size, outputs, y_true, indices) + + losses.update({"loss_ce": self.cost_class*cls_loss_final, + "loss_focal": self.cost_focal*focal_loss_final, + "loss_dice": self.cost_dice*dice_loss_final}) + + # if "aux_outputs" in outputs and outputs["aux_outputs"] is not None: + # for i, aux_outputs in enumerate(outputs["aux_outputs"]): + # indices = self.memory_efficient_matcher(aux_outputs, y_true) + # # for loss in self.losses: + # cls_loss_, focal_loss_, dice_loss_ = self.get_loss(batch_size, aux_outputs, y_true, indices) + + # l_dict = {"loss_ce" + f"_{i}": self.cost_class * cls_loss_, + # "loss_focal" + f"_{i}": self.cost_focal *focal_loss_, + # "loss_dice" + f"_{i}": self.cost_dice * dice_loss_} + # losses.update(l_dict) + + return losses + \ No newline at end of file diff --git a/models/official/projects/maskformer/losses/__pycache__/__init__.cpython-38.pyc b/models/official/projects/maskformer/losses/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 00000000..13503526 Binary files /dev/null and b/models/official/projects/maskformer/losses/__pycache__/__init__.cpython-38.pyc differ diff --git a/models/official/projects/maskformer/losses/__pycache__/__init__.cpython-39.pyc b/models/official/projects/maskformer/losses/__pycache__/__init__.cpython-39.pyc index 60470171..eeec7296 100644 Binary files a/models/official/projects/maskformer/losses/__pycache__/__init__.cpython-39.pyc and b/models/official/projects/maskformer/losses/__pycache__/__init__.cpython-39.pyc differ diff --git a/models/official/projects/maskformer/losses/__pycache__/inference.cpython-38.pyc b/models/official/projects/maskformer/losses/__pycache__/inference.cpython-38.pyc new file mode 100644 index 00000000..71ae6951 Binary files /dev/null and b/models/official/projects/maskformer/losses/__pycache__/inference.cpython-38.pyc differ diff --git a/models/official/projects/maskformer/losses/__pycache__/inference.cpython-39.pyc b/models/official/projects/maskformer/losses/__pycache__/inference.cpython-39.pyc new file mode 100644 index 00000000..271cf81c Binary files /dev/null and b/models/official/projects/maskformer/losses/__pycache__/inference.cpython-39.pyc differ diff --git a/models/official/projects/maskformer/losses/__pycache__/maskformer_losses.cpython-39.pyc b/models/official/projects/maskformer/losses/__pycache__/maskformer_losses.cpython-39.pyc index 188ef731..a30798f0 100644 Binary files a/models/official/projects/maskformer/losses/__pycache__/maskformer_losses.cpython-39.pyc and b/models/official/projects/maskformer/losses/__pycache__/maskformer_losses.cpython-39.pyc differ diff --git a/models/official/projects/maskformer/losses/__pycache__/matchers.cpython-39.pyc b/models/official/projects/maskformer/losses/__pycache__/matchers.cpython-39.pyc new file mode 100644 index 00000000..93ebac11 Binary files /dev/null and b/models/official/projects/maskformer/losses/__pycache__/matchers.cpython-39.pyc differ diff --git a/models/official/projects/maskformer/losses/inference.py b/models/official/projects/maskformer/losses/inference.py index 6b326e13..56cb237a 100644 --- a/models/official/projects/maskformer/losses/inference.py +++ b/models/official/projects/maskformer/losses/inference.py @@ -1,13 +1,23 @@ import tensorflow as tf class PanopticInference(): - def call(self, mask_true, mask_pred): + def call(self, mask_true, mask_pred, image_shape, num_classes = 133): + + interpolate = tf.keras.layers.Resizing( + image_shape[1], image_shape[2], interpolation = "bilinear") + #permute = tf.keras.layers.Permute((3, 1, 2)) + #mask_pred = permute(mask_pred) + #print(mask_pred.shape) + mask_pred = interpolate(mask_pred) + #permute = tf.keras.layers.Permute((2, 3, 1)) + #mask_pred = permute(mask_pred) + probs = tf.keras.activations.softmax(mask_true, axis=-1) scores = tf.reduce_max(probs, axis=-1) labels = tf.argmax(probs, axis=-1) mask_pred = tf.keras.activations.sigmoid(mask_pred) - config_num_classes = 171 + config_num_classes = num_classes object_mask_threshold = 0.0 keep = tf.math.logical_and(tf.math.not_equal(labels, config_num_classes), scores > object_mask_threshold) curr_scores = scores[keep] @@ -65,3 +75,4 @@ def call(self, mask_true, mask_pred): return panoptic_seg, segments_info + diff --git a/models/official/projects/maskformer/losses/inference_test.py b/models/official/projects/maskformer/losses/inference_test.py old mode 100644 new mode 100755 index afa1e412..7bcc9b22 --- a/models/official/projects/maskformer/losses/inference_test.py +++ b/models/official/projects/maskformer/losses/inference_test.py @@ -1,17 +1,20 @@ from absl.testing import parameterized import tensorflow as tf - +import sys +sys.path.append("/home/isaacjaeminin/inference/tf-maskformer/models") from official.projects.maskformer.losses.inference import PanopticInference -from official.projects.maskformer.maskformer import MaskFormer +from official.projects.maskformer.modeling.maskformer import MaskFormer class PanopticInferenceTest(tf.test.TestCase, parameterized.TestCase): @parameterized.named_parameters(('test1',)) def test_pass_through(self): - model = MaskFormer() + input_specs = tf.keras.layers.InputSpec(shape=[None] + + [640, 640, 3]) + model = MaskFormer(input_specs = input_specs) input_image = tf.ones((1, 640, 640, 3)) - expected_class_probs_shape = [1, 100, 172] + expected_class_probs_shape = [1, 100, 134] expected_mask_probs_shape = [1, 160, 160, 100] output = model(input_image) @@ -19,8 +22,9 @@ def test_pass_through(self): output["class_prob_predictions"].shape.as_list(), expected_class_probs_shape) self.assertAllEqual( output["mask_prob_predictions"].shape.as_list(), expected_mask_probs_shape) - - out = PanopticInference().call(mask_true=output["class_prob_predictions"], mask_pred=output["mask_prob_predictions"]) + print(input_image.shape[1]) + print(input_image.shape[2]) + out = PanopticInference().call(mask_true=output["class_prob_predictions"], mask_pred=output["mask_prob_predictions"],image_shape = input_image.shape, num_classes=133) print(out) diff --git a/models/official/projects/maskformer/losses/maskformer_losses.py b/models/official/projects/maskformer/losses/maskformer_losses.py index 92a31a24..4bfe5edd 100644 --- a/models/official/projects/maskformer/losses/maskformer_losses.py +++ b/models/official/projects/maskformer/losses/maskformer_losses.py @@ -1,8 +1,63 @@ import tensorflow as tf - from official.vision.losses import focal_loss +from official.projects.detr.ops import matchers +from loguru import logger +tf.compat.v1.enable_eager_execution() + +def _max_by_axis(the_list): + all_max = the_list[0] + for sublist in the_list[1:]: + for idx, item in enumerate(sublist): + all_max[idx] = max(all_max[idx], item) + return all_max + +class NestedTensor(object): + def __init__(self, tensors, mask=None): + self.tensors = tf.convert_to_tensor(tensors) + self.mask = tf.convert_to_tensor(mask) if mask is not None else None + + def to(self, device): + with tf.device(device): + cast_tensor = tf.identity(self.tensors) + cast_mask = tf.identity(self.mask) if self.mask is not None else None + return NestedTensor(cast_tensor, cast_mask) + + def decompose(self): + return self.tensors, self.mask + + def __repr__(self): + return str(self.tensors) + + +def nested_tensor_from_tensor_list(tensor_list): + if tf.rank(tensor_list[0]).numpy() == 3: + max_size = _max_by_axis([list(img.shape) for img in tensor_list]) + + batch_shape = [len(tensor_list)] + max_size + b, c, h, w = batch_shape + + dtype = tensor_list[0].dtype + device = tensor_list[0].device -class FocalLoss(focal_loss.FocalLoss): + with tf.device(device): + tensor = tf.zeros(batch_shape, dtype=dtype) + mask = tf.ones((b, h, w), dtype=tf.bool) + + for img, pad_img, m in zip(tensor_list, tensor, mask): + pad_img = tf.Variable(pad_img) + pad_img[:img.shape[0], :img.shape[1], :img.shape[2]].assign(img) + pad_img = tf.convert_to_tensor(pad_img) + + m = tf.Variable(m) + false_tensor = tf.zeros((img.shape[1], img.shape[2]), dtype=tf.bool) + m[:img.shape[1], :img.shape[2]].assign(false_tensor) + m = tf.convert_to_tensor(m) + return NestedTensor(tensor, mask) + else: + raise ValueError("not supported") + + +class FocalLossMod(focal_loss.FocalLoss): """Implements a Focal loss for segmentation problems. Reference: [Per-Pixel Classification is Not All You Need for Semantic Segmentation](https://arxiv.org/abs/2107.06278). @@ -15,9 +70,10 @@ def __init__(self, alpha=0.25, gamma=2): gamma: The `gamma` focusing parameter to re-weight loss. reduction and name? """ - super().__init__(alpha, gamma) + super().__init__(alpha, gamma, reduction='none') + # self.background_indices = background_indices - def call(self, y_true, y_pred, num_masks): + def call(self, y_true, y_pred): """Invokes the `FocalLoss`. Args: y_true: A tensor of size [batch, num_anchors, num_classes]. @@ -29,178 +85,208 @@ def call(self, y_true, y_pred, num_masks): Returns: Loss float `Tensor`. """ + # background_indices = tf.expand_dims(self.background_indices, axis=0) weighted_loss = super().call(y_true, y_pred) - loss = tf.math.reduce_sum(tf.math.reduce_mean(weighted_loss,axis=1)) / num_masks + # print("weighted loss :", weighted_loss.shape) #(1, 100, 442368) + # mean over all pixels + loss = tf.math.reduce_mean(weighted_loss, axis=-1) + # logger.debug("loss shape: {}".format(loss.shape)) + # logger.debug("loss: {}".format(loss)) return loss + def batch(self, y_true, y_pred): + """ + y_true: (b_size, 100 (num objects), h*w) + y_pred: (b_size, 100 (num objects), h*w) + """ + hw = tf.cast(tf.shape(y_pred)[-1], dtype=tf.float32) #[100, h, w] + prob = tf.keras.activations.sigmoid(y_pred) + focal_pos = tf.pow(1 - prob, self._gamma) * tf.nn.sigmoid_cross_entropy_with_logits(labels=tf.ones_like(y_pred), logits=y_pred) + focal_neg = tf.pow(prob, self._gamma) * tf.nn.sigmoid_cross_entropy_with_logits(labels=tf.zeros_like(y_pred), logits=y_pred) + + if self._alpha >= 0: + focal_pos = focal_pos * self._alpha + focal_neg = focal_neg * (1 - self._alpha) + loss = tf.einsum("bnc,bmc->bnm", focal_pos, y_true) + tf.einsum( + "bnc,bmc->bnm", focal_neg, (1 - y_true) + ) + return loss / hw + + + class DiceLoss(tf.keras.losses.Loss): - # TODO: figure out dice loss stuff - def call(self, y_true, y_pred, num_masks): - y_pred = tf.keras.activations.sigmoid(y_pred).reshape(-1) - y_true = tf.keras.activations.flatten(y_true) - numerator = 2 * tf.reduce_sum(y_pred * y_true, axis=1) - denominator = tf.reduce_sum(y_pred, axis=1) + tf.reduce_sum(y_true, axis=1) + + def __init__(self): + super().__init__(reduction='none') + + def call(self, y_true, y_pred): + """ + y_true: (b size, 100, h*w) + """ + + y_pred = tf.reshape(tf.keras.activations.sigmoid(y_pred), (y_pred.shape[0],y_pred.shape[1],-1)) + y_true = tf.reshape(y_true, (y_true.shape[0],tf.shape(y_true)[1],-1)) + + numerator = 2 * tf.reduce_sum(y_pred * y_true, axis=-1) + denominator = tf.reduce_sum(y_pred, axis=-1) + tf.reduce_sum(y_true, axis=-1) loss = 1 - (numerator + 1) / (denominator + 1) - return tf.reduce_sum(loss) / num_masks + + return loss + + def batch(self, y_true, y_pred): + # y_pred = tf.keras.activations.sigmoid(y_pred) + y_pred = tf.sigmoid(y_pred) + y_pred = tf.reshape(y_pred, [y_pred.shape[0], -1, y_pred.shape[1]]) + y_pred = tf.transpose(y_pred, [0, 2, 1]) + + numerator = 2 * tf.einsum("bnc,bmc->bnm", y_pred, y_true) + denominator = tf.reduce_sum(y_pred, axis=-1)[:, tf.newaxis] + tf.expand_dims(tf.reduce_sum(y_true, axis=-1), axis=-1) + -class Loss(): - def __init__(self, num_classes, similarity_calc, matcher, weight_dict, eos_coef, losses): + loss = 1 - (numerator + 1) / (denominator + 1) + + return loss + +class Loss: + def __init__(self, num_classes, matcher, eos_coef, cost_class = 1, cost_focal = 1, cost_dice = 1): + self.num_classes = num_classes - self.similarity_calc = similarity_calc self.matcher = matcher - self.weight_dict = weight_dict self.eos_coef = eos_coef - self.losses = losses - empty_weight = tf.ones(self.num_classes + 1) - empty_weight = tf.tensor_scatter_nd_update(empty_weight, [[self.num_classes]], [self.eos_coef]) - self.empty_weight = tf.Variable(empty_weight, trainable=False, name='empty_weight') - - def _get_pred_permutation_idx(self, indices): - batch_idx = tf.concat([tf.fill(pred,i) for i, (pred,_) in enumerate(indices)], axis=0) - pred_idx = tf.concat([pred for (pred,) in indices], axis=0) - return batch_idx, pred_idx - - def _get_true_permutation_idx(self, indices): - batch_idx = tf.concat([tf.fill(true,i) for i, (_,true) in enumerate(indices)], axis=0) - true_idx = tf.concat([true for (_,true) in indices], axis=0) - return batch_idx, true_idx - - def get_loss(self, loss, outputs, y_true, indices, num_masks): - loss_map = {"labels": ClassificationLoss().call, "masks": MaskLoss().call} - assert loss in loss_map - return loss_map[loss](outputs, y_true, indices, num_masks) + self.cost_class = cost_class + self.cost_focal = cost_focal + self.cost_dice = cost_dice - def call(self, outputs, y_true): - """This performs the loss computation. - Parameters: - outputs: dict of tensors, see the output specification of the model for the format - y_true: list of dicts, such that len(targets) == batch_size. - The expected keys in each dict depends on the losses applied, see each loss' doc - """ - outputs_without_aux = {k: v for k, v in outputs.items() if k != "aux_outputs"} - # TODO: check matcher doc - match_quality_matrix = self.similarity_calc.compare(outputs_without_aux, y_true) - # indices = self.matcher._match(outputs_without_aux, y_true) - groundtruth_weights = tf.ones(match_quality_matrix.shape[0], dtype=tf.float32) - self.matcher._match(match_quality_matrix, y_true) - # - - num_masks = sum(len(t["labels"]) for t in y_true) - num_masks = tf.convert_to_tensor([num_masks], dtype=tf.float64) # device? + + def memory_efficient_matcher(self, outputs, y_true): + batch_size, num_queries = outputs["pred_logits"].shape[:2] + out_mask = outputs["pred_masks"] + out_mask = tf.transpose(out_mask, perm=[0,3,1,2]) - if Utils.is_dist_avail_and_initialized(): - num_masks = tf.distribute.get_strategy().reduce(tf.distribute.ReduceOp.SUM, num_masks, axis=None) - num_masks = tf.maximum(num_masks / tf.distribute.get_strategy().num_replicas_in_sync, 1.0) - - losses = {} - for loss in self.losses: - losses.update(self.get_loss(loss, outputs, y_true, indices, num_masks)) + tgt_ids = tf.cast(y_true["unique_ids"], dtype=tf.int64) - if "aux_outputs" in outputs: - for i, aux_outputs in enumerate(outputs["aux_outputs"]): - indices = self.matcher(aux_outputs, y_true) - for loss in self.losses: - l_dict = self.get_loss(loss, aux_outputs, y_true, indices, num_masks) - l_dict = {k + f"_{i}": v for k, v in l_dict.items()} - losses.update(l_dict) + with tf.device(out_mask.device): + tgt_mask = y_true["individual_masks"] + tgt_mask = tf.transpose(tgt_mask, perm=[0,2,3,1,4]) # [b, 100, h, w, 1] + cost_class = tf.gather(-tf.nn.softmax(outputs["pred_logits"]), tgt_ids, batch_dims=1, axis=-1) + tgt_mask = tf.squeeze(tf.cast(tgt_mask, dtype=tf.float32),axis=-1) - return losses - + tgt_mask = tf.image.resize(tgt_mask, out_mask.shape[-2:], method='bilinear') # [b, h, w, 100] + + out_mask = tf.transpose(out_mask, perm=[0,2,3,1]) # [b, h, w, 100] + + out_mask = tf.reshape(out_mask, [tf.shape(out_mask)[0], tf.shape(out_mask)[-1], -1]) # [b, 100, h*w] + tgt_mask = tf.reshape(tgt_mask, [tf.shape(tgt_mask)[0],tf.shape(tgt_mask)[-1], -1]) # [b, 100, h*w] + + cost_focal = FocalLossMod().batch(tgt_mask, out_mask) + cost_dice = DiceLoss().batch(tgt_mask, out_mask) + + total_cost = ( + self.cost_focal * cost_focal + + self.cost_class * cost_class + + self.cost_dice * cost_dice + ) + + _, inds = matchers.hungarian_matching(total_cost) + return inds -class ClassificationLoss(Loss): - def call(self, outputs, y_true, indices, num_masks): - assert "pred_logits" in outputs + + + def get_loss(self, batch_size, outputs, y_true, indices): + + target_index = tf.math.argmax(indices, axis=1) #[batchsize, 100] + target_labels = y_true["unique_ids"] #[batchsize, num_gt_objects] + cls_outputs = outputs["pred_logits"] # [batchsize, num_queries, num_classes] [1,100,134] + cls_masks = outputs["pred_masks"]# [batchsize, h, w, num_queries] + individual_masks = y_true["individual_masks"] # [batchsize, num_gt_objects, h, w, 1] - pred_logits = outputs["pred_logits"] + - idx = super()._get_pred_permutation_idx(indices) - true_classes_o = tf.concat([t["labels"][J] for t, (_, J) in zip(y_true, indices)], axis=0) + cls_assigned = tf.gather(cls_outputs, target_index, batch_dims=1, axis=1) + mask_assigned = tf.gather(cls_masks, target_index, batch_dims=1, axis=1) - with tf.device(pred_logits.device): - true_classes = tf.cast(tf.fill(pred_logits.shape[:2], super().num_classes), dtype=tf.int64) # device? - true_classes = tf.tensor_scatter_nd_update(true_classes, tf.expand_dims(idx, axis=1), true_classes_o) + target_classes = tf.cast(target_labels, dtype=tf.int32) + background = tf.equal(target_classes, 0) # Pytorch padds 133 class number where classes are background + + num_masks = tf.reduce_sum(tf.cast(tf.logical_not(background), tf.float32), axis=-1) + ######################################################################################################## + # TODO: check if we need this! + # if Utils.is_dist_avail_and_initialized(): + # num_masks = tf.distribute.get_strategy().reduce(tf.distribute.ReduceOp.SUM, num_masks, axis=None) + # num_masks = tf.maximum(num_masks / tf.distribute.get_strategy().num_replicas_in_sync, 1.0) + ######################################################################################################### + + xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=target_classes, logits=cls_assigned) + cls_loss = self.cost_class * tf.where(background, 0.1 * xentropy, xentropy) + cls_weights = tf.where(background, 0.1 * tf.ones_like(cls_loss), tf.ones_like(cls_loss)) + + num_masks_per_replica = tf.reduce_sum(num_masks) + cls_weights_per_replica = tf.reduce_sum(cls_weights) + replica_context = tf.distribute.get_replica_context() + num_masks_sum, cls_weights_sum = replica_context.all_reduce(tf.distribute.ReduceOp.SUM,[num_masks_per_replica, cls_weights_per_replica]) + + # Final losses + cls_loss = tf.math.divide_no_nan(tf.reduce_sum(cls_loss), cls_weights_sum) + losses = {'focal_loss' : 0.0, 'dice_loss': 0.0} + + + out_mask = tf.transpose(cls_masks, perm=[0,3,1,2]) + with tf.device(out_mask.device): + tgt_mask = individual_masks - # loss_ce = tf.nn.softmax_cross_entropy_with_logits(y_true, tf.transpose(pred_logits,(1,2))) - # loss_ce = tf.nn.weighted_cross_entropy_with_logits(y_true, tf.transpose(pred_logits,(1,2)), super().empty_weight) - loss_ce = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=true_classes, logits=tf.transpose(pred_logits, [0, 2, 1])) - weighted_loss_ce = tf.reduce_mean(tf.multiply(loss_ce, super().empty_weight)) - losses = {"loss_ce": weighted_loss_ce} - return losses + tgt_mask = tf.transpose(tgt_mask, perm=[0,2,3,1,4]) + tgt_mask = tf.squeeze(tf.cast(tgt_mask, dtype=tf.float32),axis=-1) + tgt_mask = tf.image.resize(tgt_mask, out_mask.shape[-2:], method='bilinear') # [b, h, w, 100] + out_mask = tf.transpose(out_mask, perm=[0,2,3,1]) # [b, h, w, 100] + + out_mask = tf.reshape(out_mask, [tf.shape(out_mask)[0], tf.shape(out_mask)[-1], -1]) # [b, 100, h*w] + tgt_mask = tf.reshape(tgt_mask, [tf.shape(tgt_mask)[0],tf.shape(tgt_mask)[-1], -1]) + focal_loss = FocalLossMod()(tgt_mask, out_mask) + dice_loss = DiceLoss()(tgt_mask, out_mask) + + + losses['focal_loss'] = focal_loss + losses['dice_loss'] = dice_loss + background_new = background -class MaskLoss(Loss): - def call(self, outputs, y_true, indices, num_masks): - assert "pred_masks" in outputs - - pred_idx = super()._get_pred_permutation_idx(indices) - true_idx = super()._get_true_permutation_idx(indices) - pred_masks = outputs["pred_masks"] - pred_masks = pred_masks[pred_idx] - masks = [t["masks"] for t in y_true] - - true_masks, valid = Utils.nested_tensor_from_tensor_list(masks).decompose() - # true_masks = tf.cast(true_masks, pred_masks.dtype) # device? - true_masks = true_masks.to(pred_masks) - true_masks = true_masks[true_idx] - - pred_masks = tf.image.resize(pred_masks[..., tf.newaxis], true_masks.shape[-2:], method='bilinear', align_corners=False)[..., 0] - pred_masks = tf.reshape(pred_masks[:, 0], -1) - - true_masks = tf.reshape(true_masks, -1) - true_masks = tf.reshape(true_masks, pred_masks.shape) - losses = { - "loss_mask": FocalLoss().call(pred_masks, true_masks, num_masks), - "loss_dice": DiceLoss().call(pred_masks, true_masks, num_masks) - } - return losses + focal_loss_weighted = tf.where(background_new, tf.zeros_like(focal_loss), focal_loss) + dice_loss_weighted = tf.where(background_new, tf.zeros_like(dice_loss), dice_loss) + focal_loss_final = tf.math.divide_no_nan(tf.math.reduce_sum(focal_loss_weighted), num_masks_sum) + dice_loss_final = tf.math.divide_no_nan(tf.math.reduce_sum(dice_loss_weighted), num_masks_sum) -class Utils(): - def _max_by_axis(the_list): - all_max = the_list[0] - for sublist in the_list[1:]: - for idx, item in enumerate(sublist): - all_max[idx] = max(all_max[idx], item) - return all_max - - class NestedTensor(object): - def __init__(self, tensors, mask=None): - self.tensors = tf.convert_to_tensor(tensors) - self.mask = tf.convert_to_tensor(mask) if mask is not None else None - - def to(self, device): - # type: (Device) -> NestedTensor # noqa - with tf.device(device): - cast_tensor = tf.identity(self.tensors) - cast_mask = tf.identity(self.mask) if self.mask is not None else None - return NestedTensor(cast_tensor, cast_mask) - - def decompose(self): - return self.tensors, self.mask - - def __repr__(self): - return str(self.tensors) - - def nested_tensor_from_tensor_list(tensor_list): - if tf.rank(tensor_list[0]).numpy() == 3: - # TODO: figure out ONNX stuff - # if tf.executing_eagerly(): - # return _onnx_nested_tensor_from_tensor_list(tensor_list) - - max_size = tf.reduce_max([tf.shape(img) for img in tensor_list], axis=0) - batch_shape = tf.concat([[len(tensor_list)], max_size], axis=0) - batch_size, num_channels, height, width = batch_shape - with tf.device(tensor_list[0].device): - tensor = tf.zeros(batch_shape, dtype=tensor_list[0].dtype) - mask = tf.ones((batch_size, height, width), dtype=tf.bool_) - for img, pad_img, m in zip(tensor_list, tensor, mask): - pad_img[:img.shape[0], :img.shape[1], :img.shape[2]].assign(img) - m[:img.shape[1], :img.shape[2]].assign(False) - else: - raise ValueError("not supported") - return NestedTensor(tensor, mask) + + return cls_loss, focal_loss_final, dice_loss_final - def is_dist_avail_and_initialized(): - if not tf.distribute.has_strategy(): - return False - if not tf.distribute.in_cross_replica_context(): - return False - return True \ No newline at end of file + def __call__(self, outputs, y_true): + """ + This performs the loss computation. + Parameters: + outputs: dict of tensors, see the output specification of the model for the format + y_true: list of dicts, such that len(y_true) == batch_size. + The expected keys in each dict depends on the losses applied, see each loss' doc + """ + outputs_without_aux = {k: v for k, v in outputs.items() if k != "aux_outputs"} + batch_size, num_queries = outputs["pred_logits"].shape[:2] + indices = self.memory_efficient_matcher(outputs_without_aux, y_true) # (batchsize, num_queries, num_queries) + + losses = {} + + cls_loss_final, focal_loss_final, dice_loss_final = self.get_loss(batch_size, outputs, y_true, indices) + + losses.update({"loss_ce": self.cost_class*cls_loss_final, + "loss_focal": self.cost_focal*focal_loss_final, + "loss_dice": self.cost_dice*dice_loss_final}) + + # if "aux_outputs" in outputs and outputs["aux_outputs"] is not None: + # for i, aux_outputs in enumerate(outputs["aux_outputs"]): + # indices = self.memory_efficient_matcher(aux_outputs, y_true) + # # for loss in self.losses: + # cls_loss_, focal_loss_, dice_loss_ = self.get_loss(batch_size, aux_outputs, y_true, indices) + + # l_dict = {"loss_ce" + f"_{i}": self.cost_class * cls_loss_, + # "loss_focal" + f"_{i}": self.cost_focal *focal_loss_, + # "loss_dice" + f"_{i}": self.cost_dice * dice_loss_} + # losses.update(l_dict) + + return losses + \ No newline at end of file diff --git a/models/official/projects/maskformer/losses/maskformer_losses_test.py b/models/official/projects/maskformer/losses/maskformer_losses_test.py index ce72dcd9..1bbfb391 100644 --- a/models/official/projects/maskformer/losses/maskformer_losses_test.py +++ b/models/official/projects/maskformer/losses/maskformer_losses_test.py @@ -1,36 +1,73 @@ from official.projects.maskformer.losses.maskformer_losses import Loss -from research.object_detection.matchers.hungarian_matcher import HungarianBipartiteMatcher -from research.object_detection.core.region_similarity_calculator import DETRSimilarity +from official.projects.detr.ops.matchers import hungarian_matching from absl.testing import parameterized import tensorflow as tf -import torch + +import numpy as np import pickle class LossTest(tf.test.TestCase, parameterized.TestCase): @parameterized.named_parameters(('test1',)) def test_pass_through(self): - similarity_calc = DETRSimilarity() - matcher = HungarianBipartiteMatcher() - mask_weight = 20.0 - dice_weight = 1.0 + matcher = hungarian_matching no_object_weight = 0.1 - weight_dict = {"loss_ce":1, "loss_mask": mask_weight, "loss_dice": dice_weight} + losses = ["labels", "masks"] - + self.weight_dict = { + "ce_loss" : 1.0, + "focal_loss" : 20.0, + "dice_loss" : 1.0, + } loss = Loss( - num_classes = 171, - similarity_calc = similarity_calc, + num_classes = 133, matcher = matcher, - weight_dict = weight_dict, eos_coef = no_object_weight, - losses = losses + cost_class= 1.0, + cost_dice= 1.0, + cost_focal=20.0 ) - with open("losses_test.pkl", "rb") as f: - params = pickle.load(f) - - print(loss.call(params["outputs"], params["targets"])) + # outputs = {"pred_logits":tf.convert_to_tensor(np.load("output_pred_logits.npy")), "pred_masks":tf.convert_to_tensor(np.load("output_pred_masks.npy"))} + # print(f"outputs['pred_logits'] shape is {outputs['pred_logits'].shape}") + # print(f"outputs['pred_masks'] shape is {outputs['pred_masks'].shape}") + + main_pth = "/depot/qqiu/data/vishal/projects/tf-maskformer/models/official/projects/maskformer/losses" + aux_out_0 = {"pred_logits" : tf.convert_to_tensor(np.load(main_pth+"/tensors/aux_outputs_pred_logits0.npy")), "pred_masks": tf.convert_to_tensor(np.load(main_pth+"/tensors/aux_outputs_pred_masks0.npy"))} + aux_out_1 = {"pred_logits" : tf.convert_to_tensor(np.load(main_pth+"/tensors/aux_outputs_pred_logits1.npy")), "pred_masks": tf.convert_to_tensor(np.load(main_pth+"/tensors/aux_outputs_pred_masks1.npy"))} + aux_out_2 = {"pred_logits" : tf.convert_to_tensor(np.load(main_pth+"/tensors/aux_outputs_pred_logits2.npy")), "pred_masks": tf.convert_to_tensor(np.load(main_pth+"/tensors/aux_outputs_pred_masks2.npy"))} + aux_out_3 = {"pred_logits" : tf.convert_to_tensor(np.load(main_pth+"/tensors/aux_outputs_pred_logits3.npy")), "pred_masks": tf.convert_to_tensor(np.load(main_pth+"/tensors/aux_outputs_pred_masks3.npy"))} + aux_out_4 = {"pred_logits" : tf.convert_to_tensor(np.load(main_pth+"/tensors/aux_outputs_pred_logits4.npy")), "pred_masks": tf.convert_to_tensor(np.load(main_pth+"/tensors/aux_outputs_pred_masks4.npy"))} + aux_outputs = [aux_out_0, aux_out_1, aux_out_2, aux_out_3, aux_out_4] + pred_logits_load = tf.convert_to_tensor(np.load(main_pth+"/tensors/output_pred_logits.npy")) + pred_masks_load = tf.convert_to_tensor(np.load(main_pth+"/tensors/output_pred_masks.npy")) + outputs = { + "pred_logits": pred_logits_load, + "pred_masks": pred_masks_load, + "aux_outputs": aux_outputs + } + + # Load the new_targets_dict NumPy array + targets = [] + # TODO : Caution the below loop is for each image in the batch + for i in range(2): # Here 2 is for batch size + targets.append( + { + "labels": tf.convert_to_tensor(np.load(main_pth+'/tensors/targets_labels_'+str(i)+'.npy')), + "masks": tf.convert_to_tensor(np.load(main_pth+'/tensors/targets_masks_'+str(i)+'.npy')), + } + ) + + + losses = loss(outputs, targets) + + + print("Losses are : ", losses) + print("Total Loss is :", losses['loss_ce'] + losses['loss_dice'] + losses['loss_focal']) + # for i in range(4): + # print(f"Total aux Loss {i} : losses['loss_ce_'+{str(i)}] + losses['loss_dice_'+{str(i)}] + losses['loss_focal_'+{str(i)}]") + # TODO: Check if this is correct + # self.assertAllEqual(losses, ) if __name__ == '__main__': tf.test.main() \ No newline at end of file diff --git a/models/official/projects/maskformer/losses/matchers.py b/models/official/projects/maskformer/losses/matchers.py new file mode 100644 index 00000000..6d57117f --- /dev/null +++ b/models/official/projects/maskformer/losses/matchers.py @@ -0,0 +1,492 @@ +# Copyright 2023 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tensorflow implementation to solve the Linear Sum Assignment problem. + +The Linear Sum Assignment problem involves determining the minimum weight +matching for bipartite graphs. For example, this problem can be defined by +a 2D matrix C, where each element i,j determines the cost of matching worker i +with job j. The solution to the problem is a complete assignment of jobs to +workers, such that no job is assigned to more than one work and no worker is +assigned more than one job, with minimum cost. + +This implementation builds off of the Hungarian +Matching Algorithm (https://www.cse.ust.hk/~golin/COMP572/Notes/Matching.pdf). + +Based on the original implementation by Jiquan Ngiam . +""" +import tensorflow as tf +# from official.modeling import tf_utils +from official.modeling import tf_utils +import numpy as np +def _prepare(weights): + """Prepare the cost matrix. + + To speed up computational efficiency of the algorithm, all weights are shifted + to be non-negative. Each element is reduced by the row / column minimum. Note + that neither operation will effect the resulting solution but will provide + a better starting point for the greedy assignment. Note this corresponds to + the pre-processing and step 1 of the Hungarian algorithm from Wikipedia. + + Args: + weights: A float32 [batch_size, num_elems, num_elems] tensor, where each + inner matrix represents weights to be use for matching. + + Returns: + A prepared weights tensor of the same shape and dtype. + """ + # Since every worker needs a job and every job needs a worker, we can subtract + # the minimum from each. + weights -= tf.reduce_min(weights, axis=2, keepdims=True) + weights -= tf.reduce_min(weights, axis=1, keepdims=True) + return weights + + +def _greedy_assignment(adj_matrix): + """Greedily assigns workers to jobs based on an adjaceny matrix. + + Starting with an adjacency matrix representing the available connections + in the bi-partite graph, this function greedily chooses elements such + that each worker is matched to at most one job (or each job is assigned to + at most one worker). Note, if the adjacency matrix has no available values + for a particular row/column, the corresponding job/worker may go unassigned. + + Args: + adj_matrix: A bool [batch_size, num_elems, num_elems] tensor, where each + element of the inner matrix represents whether the worker (row) can be + matched to the job (column). + + Returns: + A bool [batch_size, num_elems, num_elems] tensor, where each element of the + inner matrix represents whether the worker has been matched to the job. + Each row and column can have at most one true element. Some of the rows + and columns may not be matched. + """ + + _, num_elems, _ = tf_utils.get_shape_list(adj_matrix, expected_rank=3) + adj_matrix = tf.transpose(adj_matrix, [1, 0, 2]) + + # Create a dynamic TensorArray containing the assignments for each worker/job + assignment = tf.TensorArray(tf.bool, num_elems) + + # Store the elements assigned to each column to update each iteration + col_assigned = tf.zeros_like(adj_matrix[0, ...], dtype=tf.bool) + + # Iteratively assign each row using tf.foldl. Intuitively, this is a loop + # over rows, where we incrementally assign each row. + def _assign_row(accumulator, row_adj): + # The accumulator tracks the row assignment index. + idx, assignment, col_assigned = accumulator + + # Viable candidates cannot already be assigned to another job. + candidates = row_adj & (~col_assigned) + + # Deterministically assign to the candidates of the highest index count. + max_candidate_idx = tf.argmax( + tf.cast(candidates, tf.int32), axis=1, output_type=tf.int32) + + candidates_indicator = tf.one_hot( + max_candidate_idx, + num_elems, + on_value=True, + off_value=False, + dtype=tf.bool) + print("[INFO] Candidate indicator shape :", candidates_indicator.shape) + print("[INFO] Candidate shape :", candidates.shape) + # candidates_indicator &= candidates + candidates_indicator = tf.convert_to_tensor(np.logical_and(candidates_indicator.numpy().T, candidates.numpy())) + # Make assignment to the column. + col_assigned |= candidates_indicator + assignment = assignment.write(idx, candidates_indicator) + + return (idx + 1, assignment, col_assigned) + + _, assignment, _ = tf.foldl( + _assign_row, adj_matrix, (0, assignment, col_assigned), back_prop=False) + + assignment = assignment.stack() + assignment = tf.transpose(assignment, [1, 0, 2]) + return assignment + + +def _find_augmenting_path(assignment, adj_matrix): + """Finds an augmenting path given an assignment and an adjacency matrix. + + The augmenting path search starts from the unassigned workers, then goes on + to find jobs (via an unassigned pairing), then back again to workers (via an + existing pairing), and so on. The path alternates between unassigned and + existing pairings. Returns the state after the search. + + Note: In the state the worker and job, indices are 1-indexed so that we can + use 0 to represent unreachable nodes. State contains the following keys: + + - jobs: A [batch_size, 1, num_elems] tensor containing the highest index + unassigned worker that can reach this job through a path. + - jobs_from_worker: A [batch_size, num_elems] tensor containing the worker + reached immediately before this job. + - workers: A [batch_size, num_elems, 1] tensor containing the highest index + unassigned worker that can reach this worker through a path. + - workers_from_job: A [batch_size, num_elems] tensor containing the job + reached immediately before this worker. + - new_jobs: A bool [batch_size, num_elems] tensor containing True if the + unassigned job can be reached via a path. + + State can be used to recover the path via backtracking. + + Args: + assignment: A bool [batch_size, num_elems, num_elems] tensor, where each + element of the inner matrix represents whether the worker has been matched + to the job. This may be a partial assignment. + adj_matrix: A bool [batch_size, num_elems, num_elems] tensor, where each + element of the inner matrix represents whether the worker (row) can be + matched to the job (column). + + Returns: + A state dict, which represents the outcome of running an augmenting + path search on the graph given the assignment. + """ + batch_size, num_elems, _ = tf_utils.get_shape_list( + assignment, expected_rank=3) + unassigned_workers = ~tf.reduce_any(assignment, axis=2, keepdims=True) + unassigned_jobs = ~tf.reduce_any(assignment, axis=1, keepdims=True) + + unassigned_pairings = tf.cast(adj_matrix & ~assignment, tf.int32) + existing_pairings = tf.cast(assignment, tf.int32) + + # Initialize unassigned workers to have non-zero ids, assigned workers will + # have ids = 0. + worker_indices = tf.range(1, num_elems + 1, dtype=tf.int32) + init_workers = tf.tile(worker_indices[tf.newaxis, :, tf.newaxis], + [batch_size, 1, 1]) + init_workers *= tf.cast(unassigned_workers, tf.int32) + + state = { + "jobs": tf.zeros((batch_size, 1, num_elems), dtype=tf.int32), + "jobs_from_worker": tf.zeros((batch_size, num_elems), dtype=tf.int32), + "workers": init_workers, + "workers_from_job": tf.zeros((batch_size, num_elems), dtype=tf.int32) + } + + def _has_active_workers(state, curr_workers): + """Check if there are still active workers.""" + del state + return tf.reduce_sum(curr_workers) > 0 + + def _augment_step(state, curr_workers): + """Performs one search step.""" + + # Note: These steps could be potentially much faster if sparse matrices are + # supported. The unassigned_pairings and existing_pairings matrices can be + # very sparse. + + # Find potential jobs using current workers. + potential_jobs = curr_workers * unassigned_pairings + curr_jobs = tf.reduce_max(potential_jobs, axis=1, keepdims=True) + curr_jobs_from_worker = 1 + tf.argmax( + potential_jobs, axis=1, output_type=tf.int32) + + # Remove already accessible jobs from curr_jobs. + default_jobs = tf.zeros_like(state["jobs"], dtype=state["jobs"].dtype) + curr_jobs = tf.where(state["jobs"] > 0, default_jobs, curr_jobs) + curr_jobs_from_worker *= tf.cast(curr_jobs > 0, tf.int32)[:, 0, :] + + # Find potential workers from current jobs. + potential_workers = curr_jobs * existing_pairings + curr_workers = tf.reduce_max(potential_workers, axis=2, keepdims=True) + curr_workers_from_job = 1 + tf.argmax( + potential_workers, axis=2, output_type=tf.int32) + + # Remove already accessible workers from curr_workers. + default_workers = tf.zeros_like(state["workers"]) + curr_workers = tf.where( + state["workers"] > 0, default_workers, curr_workers) + curr_workers_from_job *= tf.cast(curr_workers > 0, tf.int32)[:, :, 0] + + # Update state so that we can backtrack later. + state = state.copy() + state["jobs"] = tf.maximum(state["jobs"], curr_jobs) + state["jobs_from_worker"] = tf.maximum(state["jobs_from_worker"], + curr_jobs_from_worker) + state["workers"] = tf.maximum(state["workers"], curr_workers) + state["workers_from_job"] = tf.maximum(state["workers_from_job"], + curr_workers_from_job) + + return state, curr_workers + + state, _ = tf.while_loop( + _has_active_workers, + _augment_step, (state, init_workers), + back_prop=False) + + # Compute new jobs, this is useful for determnining termnination of the + # maximum bi-partite matching and initialization for backtracking. + new_jobs = (state["jobs"] > 0) & unassigned_jobs + state["new_jobs"] = new_jobs[:, 0, :] + return state + + +def _improve_assignment(assignment, state): + """Improves an assignment by backtracking the augmented path using state. + + Args: + assignment: A bool [batch_size, num_elems, num_elems] tensor, where each + element of the inner matrix represents whether the worker has been matched + to the job. This may be a partial assignment. + state: A dict, which represents the outcome of running an augmenting path + search on the graph given the assignment. + + Returns: + A new assignment matrix of the same shape and type as assignment, where the + assignment has been updated using the augmented path found. + """ + batch_size, num_elems, _ = tf_utils.get_shape_list(assignment, 3) + + # We store the current job id and iteratively backtrack using jobs_from_worker + # and workers_from_job until we reach an unassigned worker. We flip all the + # assignments on this path to discover a better overall assignment. + + # Note: The indices in state are 1-indexed, where 0 represents that the + # worker / job cannot be reached. + + # Obtain initial job indices based on new_jobs. + curr_job_idx = tf.argmax( + tf.cast(state["new_jobs"], tf.int32), axis=1, output_type=tf.int32) + + # Track whether an example is actively being backtracked. Since we are + # operating on a batch, not all examples in the batch may be active. + active = tf.gather(state["new_jobs"], curr_job_idx, batch_dims=1) + batch_range = tf.range(0, batch_size, dtype=tf.int32) + + # Flip matrix tracks which assignments we need to flip - corresponding to the + # augmenting path taken. We use an integer tensor here so that we can use + # tensor_scatter_nd_add to update the tensor, and then cast it back to bool + # after the loop. + flip_matrix = tf.zeros((batch_size, num_elems, num_elems), dtype=tf.int32) + + def _has_active_backtracks(flip_matrix, active, curr_job_idx): + """Check if there are still active workers.""" + del flip_matrix, curr_job_idx + return tf.reduce_any(active) + + def _backtrack_one_step(flip_matrix, active, curr_job_idx): + """Take one step in backtracking.""" + # Discover the worker that the job originated from, note that this worker + # must exist by construction. + curr_worker_idx = tf.gather( + state["jobs_from_worker"], curr_job_idx, batch_dims=1) - 1 + curr_worker_idx = tf.maximum(curr_worker_idx, 0) + update_indices = tf.stack([batch_range, curr_worker_idx, curr_job_idx], + axis=1) + update_indices = tf.maximum(update_indices, 0) + flip_matrix = tf.tensor_scatter_nd_add(flip_matrix, update_indices, + tf.cast(active, tf.int32)) + + # Discover the (potential) job that the worker originated from. + curr_job_idx = tf.gather( + state["workers_from_job"], curr_worker_idx, batch_dims=1) - 1 + # Note that jobs may not be active, and we track that here (before + # adjusting indices so that they are all >= 0 for gather). + active &= curr_job_idx >= 0 + curr_job_idx = tf.maximum(curr_job_idx, 0) + update_indices = tf.stack([batch_range, curr_worker_idx, curr_job_idx], + axis=1) + update_indices = tf.maximum(update_indices, 0) + flip_matrix = tf.tensor_scatter_nd_add(flip_matrix, update_indices, + tf.cast(active, tf.int32)) + + return flip_matrix, active, curr_job_idx + + flip_matrix, _, _ = tf.while_loop( + _has_active_backtracks, + _backtrack_one_step, (flip_matrix, active, curr_job_idx), + back_prop=False) + + flip_matrix = tf.cast(flip_matrix, tf.bool) + assignment = tf.math.logical_xor(assignment, flip_matrix) + + return assignment + + +def _maximum_bipartite_matching(adj_matrix, assignment=None): + """Performs maximum bipartite matching using augmented paths. + + Args: + adj_matrix: A bool [batch_size, num_elems, num_elems] tensor, where each + element of the inner matrix represents whether the worker (row) can be + matched to the job (column). + assignment: An optional bool [batch_size, num_elems, num_elems] tensor, + where each element of the inner matrix represents whether the worker has + been matched to the job. This may be a partial assignment. If specified, + this assignment will be used to seed the iterative algorithm. + + Returns: + A state dict representing the final augmenting path state search, and + a maximum bipartite matching assignment tensor. Note that the state outcome + can be used to compute a minimum vertex cover for the bipartite graph. + """ + + if assignment is None: + assignment = _greedy_assignment(adj_matrix) + + state = _find_augmenting_path(assignment, adj_matrix) + + def _has_new_jobs(state, assignment): + del assignment + return tf.reduce_any(state["new_jobs"]) + + def _improve_assignment_and_find_new_path(state, assignment): + assignment = _improve_assignment(assignment, state) + state = _find_augmenting_path(assignment, adj_matrix) + return state, assignment + + state, assignment = tf.while_loop( + _has_new_jobs, + _improve_assignment_and_find_new_path, (state, assignment), + back_prop=False) + + return state, assignment + + +def _compute_cover(state, assignment): + """Computes a cover for the bipartite graph. + + We compute a cover using the construction provided at + https://en.wikipedia.org/wiki/K%C5%91nig%27s_theorem_(graph_theory)#Proof + which uses the outcome from the alternating path search. + + Args: + state: A state dict, which represents the outcome of running an augmenting + path search on the graph given the assignment. + assignment: An optional bool [batch_size, num_elems, num_elems] tensor, + where each element of the inner matrix represents whether the worker has + been matched to the job. This may be a partial assignment. If specified, + this assignment will be used to seed the iterative algorithm. + + Returns: + A tuple of (workers_cover, jobs_cover) corresponding to row and column + covers for the bipartite graph. workers_cover is a boolean tensor of shape + [batch_size, num_elems, 1] and jobs_cover is a boolean tensor of shape + [batch_size, 1, num_elems]. + """ + assigned_workers = tf.reduce_any(assignment, axis=2, keepdims=True) + assigned_jobs = tf.reduce_any(assignment, axis=1, keepdims=True) + + reachable_workers = state["workers"] > 0 + reachable_jobs = state["jobs"] > 0 + + workers_cover = assigned_workers & (~reachable_workers) + jobs_cover = assigned_jobs & reachable_jobs + + return workers_cover, jobs_cover + + +def _update_weights_using_cover(workers_cover, jobs_cover, weights): + """Updates weights for hungarian matching using a cover. + + We first find the minimum uncovered weight. Then, we subtract this from all + the uncovered weights, and add it to all the doubly covered weights. + + Args: + workers_cover: A boolean tensor of shape [batch_size, num_elems, 1]. + jobs_cover: A boolean tensor of shape [batch_size, 1, num_elems]. + weights: A float32 [batch_size, num_elems, num_elems] tensor, where each + inner matrix represents weights to be use for matching. + + Returns: + A new weight matrix with elements adjusted by the cover. + """ + max_value = tf.reduce_max(weights) + + covered = workers_cover | jobs_cover + double_covered = workers_cover & jobs_cover + + uncovered_weights = tf.where(covered, + tf.ones_like(weights) * max_value, weights) + min_weight = tf.reduce_min(uncovered_weights, axis=[-2, -1], keepdims=True) + + add_weight = tf.where(double_covered, + tf.ones_like(weights) * min_weight, + tf.zeros_like(weights)) + sub_weight = tf.where(covered, tf.zeros_like(weights), + tf.ones_like(weights) * min_weight) + + return weights + add_weight - sub_weight + + +def assert_rank(tensor, expected_rank, name=None): + """Raises an exception if the tensor rank is not of the expected rank. + + Args: + tensor: A tf.Tensor to check the rank of. + expected_rank: Python integer or list of integers, expected rank. + name: Optional name of the tensor for the error message. + + Raises: + ValueError: If the expected shape doesn't match the actual shape. + """ + expected_rank_dict = {} + if isinstance(expected_rank, int): + expected_rank_dict[expected_rank] = True + else: + for x in expected_rank: + expected_rank_dict[x] = True + + actual_rank = len(tensor.shape) + if actual_rank not in expected_rank_dict: + raise ValueError( + "For the tensor `%s`, the actual tensor rank `%d` (shape = %s) is not " + "equal to the expected tensor rank `%s`" % + (name, actual_rank, str(tensor.shape), str(expected_rank))) + + +def hungarian_matching(weights): + """Computes the minimum linear sum assignment using the Hungarian algorithm. + + Args: + weights: A float32 [batch_size, num_elems, num_elems] tensor, where each + inner matrix represents weights to be use for matching. + + Returns: + A bool [batch_size, num_elems, num_elems] tensor, where each element of the + inner matrix represents whether the worker has been matched to the job. + The returned matching will always be a perfect match. + """ + batch_size, num_elems, _ = tf_utils.get_shape_list(weights, 3) + + weights = _prepare(weights) + adj_matrix = tf.equal(weights, 0.) + state, assignment = _maximum_bipartite_matching(adj_matrix) + workers_cover, jobs_cover = _compute_cover(state, assignment) + + def _cover_incomplete(workers_cover, jobs_cover, *args): + del args + cover_sum = ( + tf.reduce_sum(tf.cast(workers_cover, tf.int32)) + + tf.reduce_sum(tf.cast(jobs_cover, tf.int32))) + return tf.less(cover_sum, batch_size * num_elems) + + def _update_weights_and_match(workers_cover, jobs_cover, weights, assignment): + weights = _update_weights_using_cover(workers_cover, jobs_cover, weights) + adj_matrix = tf.equal(weights, 0.) + state, assignment = _maximum_bipartite_matching(adj_matrix, assignment) + workers_cover, jobs_cover = _compute_cover(state, assignment) + return workers_cover, jobs_cover, weights, assignment + + workers_cover, jobs_cover, weights, assignment = tf.while_loop( + _cover_incomplete, + _update_weights_and_match, + (workers_cover, jobs_cover, weights, assignment), + back_prop=False) + return weights, assignment + diff --git a/models/official/projects/maskformer/losses/matchers_test.py b/models/official/projects/maskformer/losses/matchers_test.py new file mode 100644 index 00000000..87e27430 --- /dev/null +++ b/models/official/projects/maskformer/losses/matchers_test.py @@ -0,0 +1,94 @@ +# Copyright 2023 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for tensorflow_models.official.projects.detr.ops.matchers.""" + +import numpy as np +from scipy import optimize +import tensorflow as tf + +# from official.projects.detr.ops import matchers +import matchers +class MatchersOpsTest(tf.test.TestCase): + + def testLinearSumAssignment(self): + """Check a simple 2D test case of the Linear Sum Assignment problem. + + Ensures that the implementation of the matching algorithm is correct + and functional on TPUs. + """ + cost_matrix = np.array([[[4, 1, 3], [2, 0, 5], [3, 2, 2]]], + dtype=np.float32) + _, adjacency_matrix = matchers.hungarian_matching(tf.constant(cost_matrix)) + adjacency_output = adjacency_matrix.numpy() + + correct_output = np.array([ + [0, 1, 0], + [1, 0, 0], + [0, 0, 1], + ], dtype=bool) + self.assertAllEqual(adjacency_output[0], correct_output) + + def testBatchedLinearSumAssignment(self): + """Check a batched case of the Linear Sum Assignment Problem. + + Ensures that a correct solution is found for all inputted problems within + a batch. + """ + cost_matrix = np.array([ + [[4, 1, 3], [2, 0, 5], [3, 2, 2]], + [[1, 4, 3], [0, 2, 5], [2, 3, 2]], + [[1, 3, 4], [0, 5, 2], [2, 2, 3]], + ], + dtype=np.float32) + _, adjacency_matrix = matchers.hungarian_matching(tf.constant(cost_matrix)) + adjacency_output = adjacency_matrix.numpy() + + # Hand solved correct output for the linear sum assignment problem + correct_output = np.array([ + [[0, 1, 0], [1, 0, 0], [0, 0, 1]], + [[1, 0, 0], [0, 1, 0], [0, 0, 1]], + [[1, 0, 0], [0, 0, 1], [0, 1, 0]], + ], + dtype=bool) + self.assertAllClose(adjacency_output, correct_output) + + def testMaximumBipartiteMatching(self): + """Check that the maximum bipartite match assigns the correct numbers.""" + adj_matrix = tf.cast([[ + [1, 0, 0, 0, 1], + [0, 1, 0, 1, 0], + [0, 0, 1, 0, 0], + [0, 1, 0, 0, 0], + [1, 0, 0, 0, 0], + ]], tf.bool) + _, assignment = matchers._maximum_bipartite_matching(adj_matrix) + self.assertEqual(np.sum(assignment.numpy()), 5) + + def testAssignmentMatchesScipy(self): + """Check that the Linear Sum Assignment matches the Scipy implementation.""" + batch_size, num_elems = 2, 25 + weights = tf.random.uniform((batch_size, num_elems, num_elems), + minval=0., + maxval=1.) + weights, assignment = matchers.hungarian_matching(weights) + + for idx in range(batch_size): + _, scipy_assignment = optimize.linear_sum_assignment(weights.numpy()[idx]) + hungarian_assignment = np.where(assignment.numpy()[idx])[1] + + self.assertAllEqual(hungarian_assignment, scipy_assignment) + +if __name__ == '__main__': + tf.test.main() diff --git a/models/official/projects/maskformer/losses/tensors/aux_outputs_pred_logits0.npy b/models/official/projects/maskformer/losses/tensors/aux_outputs_pred_logits0.npy new file mode 100644 index 00000000..c1a4f3d7 Binary files /dev/null and b/models/official/projects/maskformer/losses/tensors/aux_outputs_pred_logits0.npy differ diff --git a/models/official/projects/maskformer/losses/tensors/aux_outputs_pred_logits1.npy b/models/official/projects/maskformer/losses/tensors/aux_outputs_pred_logits1.npy new file mode 100644 index 00000000..dbb88e0d Binary files /dev/null and b/models/official/projects/maskformer/losses/tensors/aux_outputs_pred_logits1.npy differ diff --git a/models/official/projects/maskformer/losses/tensors/aux_outputs_pred_logits2.npy b/models/official/projects/maskformer/losses/tensors/aux_outputs_pred_logits2.npy new file mode 100644 index 00000000..e45a60fa Binary files /dev/null and b/models/official/projects/maskformer/losses/tensors/aux_outputs_pred_logits2.npy differ diff --git a/models/official/projects/maskformer/losses/tensors/aux_outputs_pred_logits3.npy b/models/official/projects/maskformer/losses/tensors/aux_outputs_pred_logits3.npy new file mode 100644 index 00000000..831d1921 Binary files /dev/null and b/models/official/projects/maskformer/losses/tensors/aux_outputs_pred_logits3.npy differ diff --git a/models/official/projects/maskformer/losses/tensors/aux_outputs_pred_logits4.npy b/models/official/projects/maskformer/losses/tensors/aux_outputs_pred_logits4.npy new file mode 100644 index 00000000..d3c21f71 Binary files /dev/null and b/models/official/projects/maskformer/losses/tensors/aux_outputs_pred_logits4.npy differ diff --git a/models/official/projects/maskformer/losses/tensors/aux_outputs_pred_masks0.npy b/models/official/projects/maskformer/losses/tensors/aux_outputs_pred_masks0.npy new file mode 100644 index 00000000..f323e13e Binary files /dev/null and b/models/official/projects/maskformer/losses/tensors/aux_outputs_pred_masks0.npy differ diff --git a/models/official/projects/maskformer/losses/tensors/aux_outputs_pred_masks1.npy b/models/official/projects/maskformer/losses/tensors/aux_outputs_pred_masks1.npy new file mode 100644 index 00000000..0adb02a8 Binary files /dev/null and b/models/official/projects/maskformer/losses/tensors/aux_outputs_pred_masks1.npy differ diff --git a/models/official/projects/maskformer/losses/tensors/aux_outputs_pred_masks2.npy b/models/official/projects/maskformer/losses/tensors/aux_outputs_pred_masks2.npy new file mode 100644 index 00000000..cac4ef36 Binary files /dev/null and b/models/official/projects/maskformer/losses/tensors/aux_outputs_pred_masks2.npy differ diff --git a/models/official/projects/maskformer/losses/tensors/aux_outputs_pred_masks3.npy b/models/official/projects/maskformer/losses/tensors/aux_outputs_pred_masks3.npy new file mode 100644 index 00000000..f82b3d88 Binary files /dev/null and b/models/official/projects/maskformer/losses/tensors/aux_outputs_pred_masks3.npy differ diff --git a/models/official/projects/maskformer/losses/tensors/aux_outputs_pred_masks4.npy b/models/official/projects/maskformer/losses/tensors/aux_outputs_pred_masks4.npy new file mode 100644 index 00000000..e0cf3356 Binary files /dev/null and b/models/official/projects/maskformer/losses/tensors/aux_outputs_pred_masks4.npy differ diff --git a/models/official/projects/maskformer/losses/tensors/images.npy b/models/official/projects/maskformer/losses/tensors/images.npy new file mode 100644 index 00000000..9a232454 Binary files /dev/null and b/models/official/projects/maskformer/losses/tensors/images.npy differ diff --git a/models/official/projects/maskformer/losses/tensors/output_pred_logits.npy b/models/official/projects/maskformer/losses/tensors/output_pred_logits.npy new file mode 100644 index 00000000..cfb07381 Binary files /dev/null and b/models/official/projects/maskformer/losses/tensors/output_pred_logits.npy differ diff --git a/models/official/projects/maskformer/losses/tensors/output_pred_masks.npy b/models/official/projects/maskformer/losses/tensors/output_pred_masks.npy new file mode 100644 index 00000000..b1c11dd3 Binary files /dev/null and b/models/official/projects/maskformer/losses/tensors/output_pred_masks.npy differ diff --git a/models/official/projects/maskformer/losses/tensors/targets_labels_0.npy b/models/official/projects/maskformer/losses/tensors/targets_labels_0.npy new file mode 100644 index 00000000..f865d192 Binary files /dev/null and b/models/official/projects/maskformer/losses/tensors/targets_labels_0.npy differ diff --git a/models/official/projects/maskformer/losses/tensors/targets_labels_1.npy b/models/official/projects/maskformer/losses/tensors/targets_labels_1.npy new file mode 100644 index 00000000..af918e3f Binary files /dev/null and b/models/official/projects/maskformer/losses/tensors/targets_labels_1.npy differ diff --git a/models/official/projects/maskformer/losses/tensors/targets_masks_0.npy b/models/official/projects/maskformer/losses/tensors/targets_masks_0.npy new file mode 100644 index 00000000..d82390d2 Binary files /dev/null and b/models/official/projects/maskformer/losses/tensors/targets_masks_0.npy differ diff --git a/models/official/projects/maskformer/losses/tensors/targets_masks_1.npy b/models/official/projects/maskformer/losses/tensors/targets_masks_1.npy new file mode 100644 index 00000000..75a78279 Binary files /dev/null and b/models/official/projects/maskformer/losses/tensors/targets_masks_1.npy differ diff --git a/models/official/projects/maskformer/modeling/.ipynb_checkpoints/maskformer-checkpoint.py b/models/official/projects/maskformer/modeling/.ipynb_checkpoints/maskformer-checkpoint.py new file mode 100644 index 00000000..a1f659c1 --- /dev/null +++ b/models/official/projects/maskformer/modeling/.ipynb_checkpoints/maskformer-checkpoint.py @@ -0,0 +1,113 @@ +import tensorflow as tf + +from official.vision.modeling.backbones import resnet +from official.projects.maskformer.modeling.decoder.transformer_decoder import MaskFormerTransformer +from official.projects.maskformer.modeling.layers.nn_block import MLPHead +from official.projects.maskformer.modeling.decoder.transformer_pixel_decoder import TransformerFPN + +# TODO(ibrahim): Add all parameters model parameters and remove hardcoding. +class MaskFormer(tf.keras.Model): + """Maskformer""" + def __init__(self, + input_specs, + fpn_feat_dims=256, + data_format=None, + dilation_rate=(1, 1), + groups=1, + activation='relu', + use_bias=False, + kernel_initializer="glorot_uniform", + bias_initializer="zeros", + kernel_regularizer=None, + bias_regularizer=None, + activity_regularizer=None, + kernel_constraint=None, + bias_constraint=None, + num_queries=100, + hidden_size=256, + num_encoder_layers=0, + num_decoder_layers=6, + dropout_rate=0.1, + backbone_endpoint_name='5', + num_classes=133, + batch_size=1, + **kwargs): + self._input_specs = input_specs + self._batch_size = batch_size + self._num_classes = num_classes + + # Pixel Deocder paramters. + self._fpn_feat_dims = fpn_feat_dims + self._data_format = data_format + self._dilation_rate = dilation_rate + self._groups = groups + self._activation = activation + self._use_bias = use_bias + self._kernel_initializer = kernel_initializer + self._bias_initializer = bias_initializer + self._kernel_regularizer = kernel_regularizer + self._bias_regularizer = bias_regularizer + self._activity_regularizer = activity_regularizer + self._kernel_constraint = kernel_constraint + self._bias_constraint = bias_constraint + + # DETRTransformer parameters. + self._num_encoder_layers = num_encoder_layers + self._num_decoder_layers = num_decoder_layers + self._num_queries = num_queries + self._hidden_size = hidden_size + self._dropout_rate = dropout_rate + self._backbone_endpoint = backbone_endpoint_name + + + super(MaskFormer, self).__init__(**kwargs) + + def build(self, image_shape): + #backbone + print("[Build MaskFormer] image shape: ", image_shape) + + self.backbone = resnet.ResNet(50, input_specs=self._input_specs, bn_trainable=False) + #decoders + self.pixel_decoder = TransformerFPN(batch_size = self._batch_size, + fpn_feat_dims=self._fpn_feat_dims, + data_format=self._data_format, + dilation_rate=self._dilation_rate, + groups=self._groups, + activation=self._activation, + use_bias=self._use_bias, + kernel_initializer=self._kernel_initializer, + bias_initializer=self._bias_initializer, + kernel_regularizer=self._kernel_regularizer, + bias_regularizer=self._bias_regularizer, + activity_regularizer=self._activity_regularizer, + kernel_constraint=self._kernel_constraint, + bias_constraint=self._bias_constraint) + self.transformer = MaskFormerTransformer(backbone_endpoint_name=self._backbone_endpoint, + batch_size=self._batch_size, + num_queries=self._num_queries, + hidden_size=self._hidden_size, + num_encoder_layers=self._num_encoder_layers, + num_decoder_layers=self._num_decoder_layers, + dropout_rate=self._dropout_rate) + self.head = MLPHead(num_classes=self._num_classes, + hidden_dim=self._hidden_size, + mask_dim=self._fpn_feat_dims) + + super(MaskFormer, self).build(image_shape) + + def process_feature_maps(self, maps): + new_dict = {} + for k in maps.keys(): + new_dict[k[0]] = maps[k] + return new_dict + + def call(self, image): + # image = tf.reshape(image, [1, 800, 1135, 3]) + # image = tf.ones((1, 640, 640, 3)) + backbone_feature_maps = self.backbone(image) + mask_features, transformer_enc_feat = self.pixel_decoder(self.process_feature_maps(backbone_feature_maps)) + transformer_features = self.transformer({"features": transformer_enc_feat}) + seg_pred = self.head({"per_pixel_embeddings" : mask_features, + "per_segment_embeddings": transformer_features}) + + return seg_pred diff --git a/models/official/projects/maskformer/modeling/__pycache__/__init__.cpython-38.pyc b/models/official/projects/maskformer/modeling/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 00000000..5bed6795 Binary files /dev/null and b/models/official/projects/maskformer/modeling/__pycache__/__init__.cpython-38.pyc differ diff --git a/models/official/projects/maskformer/modeling/__pycache__/__init__.cpython-39.pyc b/models/official/projects/maskformer/modeling/__pycache__/__init__.cpython-39.pyc index beb927fc..b0dc2b14 100644 Binary files a/models/official/projects/maskformer/modeling/__pycache__/__init__.cpython-39.pyc and b/models/official/projects/maskformer/modeling/__pycache__/__init__.cpython-39.pyc differ diff --git a/models/official/projects/maskformer/modeling/__pycache__/maskformer.cpython-38.pyc b/models/official/projects/maskformer/modeling/__pycache__/maskformer.cpython-38.pyc new file mode 100644 index 00000000..dc0cfe7d Binary files /dev/null and b/models/official/projects/maskformer/modeling/__pycache__/maskformer.cpython-38.pyc differ diff --git a/models/official/projects/maskformer/modeling/__pycache__/maskformer.cpython-39.pyc b/models/official/projects/maskformer/modeling/__pycache__/maskformer.cpython-39.pyc index dbb2923a..2c8d9a59 100644 Binary files a/models/official/projects/maskformer/modeling/__pycache__/maskformer.cpython-39.pyc and b/models/official/projects/maskformer/modeling/__pycache__/maskformer.cpython-39.pyc differ diff --git a/models/official/projects/maskformer/modeling/backbone/backbone_test.py b/models/official/projects/maskformer/modeling/backbone/backbone_test.py index fdf65e8b..c07f6834 100644 --- a/models/official/projects/maskformer/modeling/backbone/backbone_test.py +++ b/models/official/projects/maskformer/modeling/backbone/backbone_test.py @@ -1,5 +1,4 @@ from absl.testing import parameterized -import math import tensorflow as tf from official.vision.modeling.backbones import resnet @@ -7,35 +6,26 @@ class ResNetTest(parameterized.TestCase, tf.test.TestCase): @parameterized.parameters( - (1, 640, 640, 50), (1, 608, 911, 50) + (640, 50), ) - def test_network_creation(self, batch_size, width, height, model_id): + def test_network_creation(self, input_size, model_id): tf.keras.backend.set_image_data_format('channels_last') network = resnet.ResNet(model_id=model_id) self.assertEqual(network.count_params(), 23561152) - inputs = tf.keras.Input(shape=(width, height, 3), batch_size=1) + inputs = tf.keras.Input(shape=(input_size, input_size, 3), batch_size=1) endpoints = network(inputs) - for x in endpoints.values(): - print(x.shape) - - self.assertAllEqual( - [batch_size, int(math.ceil(width / 2**2)), int(math.ceil(height / 2**2)), 256] - , endpoints['2'].shape.as_list(), "failure on 2") - self.assertAllEqual( - [batch_size, int(math.ceil(width / 2**3)), int(math.ceil(height / 2**3)), 512] - , endpoints['3'].shape.as_list(), "failure on 3") - + [1, 80, 80, 512] + , endpoints['3'].shape.as_list(), "failure on 3") self.assertAllEqual( - [batch_size, int(math.ceil(width / 2**4)), int(math.ceil(height / 2**4)), 1024] - , endpoints['4'].shape.as_list(), "failure on 4") - + [1, 40, 40, 1024] + , endpoints['4'].shape.as_list(), "failure on 4") self.assertAllEqual( - [batch_size, int(math.ceil(width / 2**5)), int(math.ceil(height / 2**5)), 2048] - , endpoints['5'].shape.as_list(), "failure on 5") + [1, 20, 20, 2048] + , endpoints['5'].shape.as_list(), "failure on 5") if __name__ == '__main__': tf.test.main() \ No newline at end of file diff --git a/models/official/projects/maskformer/modeling/decoder/.ipynb_checkpoints/transformer_decoder-checkpoint.py b/models/official/projects/maskformer/modeling/decoder/.ipynb_checkpoints/transformer_decoder-checkpoint.py new file mode 100644 index 00000000..be1ac1ed --- /dev/null +++ b/models/official/projects/maskformer/modeling/decoder/.ipynb_checkpoints/transformer_decoder-checkpoint.py @@ -0,0 +1,93 @@ +import math +import tensorflow as tf + +from official.projects.detr.modeling.detr import position_embedding_sine +from official.projects.detr.modeling import transformer +from official.modeling import tf_utils +from official.projects.maskformer.modeling.decoder.detr_transformer import DETRTransformer + +class MaskFormerTransformer(tf.keras.layers.Layer): + def __init__(self, + backbone_endpoint_name, + batch_size, + num_queries, + hidden_size, + num_encoder_layers=6, + num_decoder_layers=6, + dropout_rate=0.1, + **kwargs): + super().__init__(**kwargs) + + self._backbone_endpoint_name = backbone_endpoint_name + + # Embeddings parameters. + self._batch_size = batch_size + self._num_queries = num_queries + self._hidden_size = hidden_size + if hidden_size % 2 != 0: + raise ValueError("hidden_size must be a multiple of 2.") + + + # DETRTransformer parameters. + self._num_encoder_layers = num_encoder_layers + self._num_decoder_layers = num_decoder_layers + self._dropout_rate = dropout_rate + + + + def build(self, input_shape): + self._transformer = DETRTransformer(num_encoder_layers=self._num_encoder_layers, + num_decoder_layers=self._num_decoder_layers, + dropout_rate=self._dropout_rate) + + self._query_embeddings = self.add_weight( + "detr/query_embeddings", + shape=[self._num_queries, self._hidden_size], + initializer=tf.keras.initializers.RandomNormal(mean=0., stddev=1.), + dtype=tf.float32) + + sqrt_k = math.sqrt(1.0 / self._hidden_size) + + self._input_proj = tf.keras.layers.Conv2D( + self._hidden_size, 1, name="detr/conv2d") + + def _generate_image_mask(self, features: tf.Tensor) -> tf.Tensor: + """Generates image mask from input image.""" + mask = tf.zeros([features.shape[0],features.shape[1],features.shape[2]]) + mask = tf.cast(mask, dtype = bool) + return mask + + def call(self, inputs): + features = inputs['features'] + + mask = self._generate_image_mask(features) + + pos_embed = position_embedding_sine( + mask, num_pos_features=self._hidden_size) + pos_embed = tf.reshape(pos_embed, [self._batch_size, -1, self._hidden_size]) + + features = tf.reshape( + self._input_proj(features), [self._batch_size, -1, self._hidden_size]) + + decoded_list = self._transformer({ + "inputs": + features, + "targets": + tf.tile( + tf.expand_dims(self._query_embeddings, axis=0), + (self._batch_size, 1, 1)), + "pos_embed": pos_embed, + "mask": None, + }) + + return decoded_list + + def get_config(self): + return { + "backbone_endpoint_name": self._backbone_endpoint_name, + "num_queries": self._num_queries, + "hidden_size": self._hidden_size, + "num_encoder_layers": self._num_encoder_layers, + "num_decoder_layers": self._num_decoder_layers, + "dropout_rate": self._dropout_rate, + } diff --git a/models/official/projects/maskformer/modeling/decoder/.ipynb_checkpoints/transformer_pixel_decoder-checkpoint.py b/models/official/projects/maskformer/modeling/decoder/.ipynb_checkpoints/transformer_pixel_decoder-checkpoint.py new file mode 100644 index 00000000..60f32abb --- /dev/null +++ b/models/official/projects/maskformer/modeling/decoder/.ipynb_checkpoints/transformer_pixel_decoder-checkpoint.py @@ -0,0 +1,197 @@ +import tensorflow as tf +import tensorflow_addons as tfa +from official.vision.ops.spatial_transform_ops import nearest_upsampling +from official.projects.detr.modeling.detr import position_embedding_sine +from official.projects.detr.modeling.transformer import TransformerEncoder +from official.projects.maskformer.modeling.decoder.detr_transformer import DETRTransformer + +class TransformerFPN(tf.keras.layers.Layer): + """MaskFormer Feature Pyramid Networks.""" + + def __init__(self, + batch_size = 16, + fpn_feat_dims=256, + data_format=None, + dilation_rate=(1, 1), + groups=1, + activation='relu', + use_bias=False, + kernel_initializer="glorot_uniform", + bias_initializer="zeros", + kernel_regularizer=None, + bias_regularizer=None, + activity_regularizer=None, + kernel_constraint=None, + bias_constraint=None, + **kwargs): + """FPN initialization function. + Args: + fpn_feat_dims: `int`, Feature dimension of the fpn. + + TODO: fill in new args + + """ + super(TransformerFPN, self).__init__(**kwargs) + + self._batch_size = batch_size + + # conv2d params + self._fpn_feat_dims = fpn_feat_dims + self._data_format = data_format + self._dilation_rate = dilation_rate + self._groups = groups + self._activation = activation + self._use_bias = use_bias + self._kernel_initializer = kernel_initializer + self._bias_initializer = bias_initializer + self._kernel_regularizer = kernel_regularizer + self._bias_regularizer = bias_regularizer + self._activity_regularizer = activity_regularizer + self._kernel_constraint = kernel_constraint + self._bias_constraint = bias_constraint + + + if tf.keras.backend.image_data_format() == 'channels_last': + # format: (batch_size, height, width, channels) + self._channels_last = True + else: + # format: (batch_size, channels, width, height) + self._channels_last = False + + def build(self, multilevel_features): + conv_args = { + "data_format": self._data_format, + "dilation_rate": self._dilation_rate, + "groups": self._groups, + "activation": None, + "use_bias": self._use_bias, + "kernel_initializer": self._kernel_initializer, + "bias_initializer": self._bias_initializer, + "kernel_regularizer": self._kernel_regularizer, + "bias_regularizer": self._bias_regularizer, + "activity_regularizer": self._activity_regularizer, + "kernel_constraint": self._kernel_constraint, + "bias_constraint": self._bias_constraint + } + + input_levels = list(multilevel_features.keys()) + levels = input_levels[:-1] + + self._input_proj = tf.keras.layers.Conv2D(filters=self._fpn_feat_dims, + kernel_size=(1, 1), + padding='same', + name = f"input_proj", + use_bias = True) + self._transformer_encoder = TransformerEncoder(norm_first=False, + dropout_rate = .1, + num_layers=6) + self._interpolations = [] + self._conv2d_op_lateral = [] + self._lateral_groupnorm = [] + for level in levels[::-1]: + lateral = tf.keras.layers.Conv2D(filters=self._fpn_feat_dims, + kernel_size=(1, 1), + padding='same', + name = f"lateral_{level}", + **conv_args) + lateral_norm = tf.keras.layers.GroupNormalization(name = f"lateral_norm_{level}") + interpolate = tf.keras.layers.Resizing( + multilevel_features[level][1], multilevel_features[level][2], interpolation = "nearest") + + self._conv2d_op_lateral.append(lateral) + self._lateral_groupnorm.append(lateral_norm) + self._interpolations.append(interpolate) + + self._conv2d_op_down = [] + self._down_groupnorm = [] + down = tf.keras.layers.Conv2D(filters=self._fpn_feat_dims, + strides=(1, 1), + kernel_size=(3, 3), + padding='same', + name = "down_initial_conv", + **conv_args) + down_norm = tf.keras.layers.GroupNormalization(name = "down_initial_norm") + self._down_groupnorm.append(down_norm) + self._conv2d_op_down.append(down) + + for level in levels[::-1]: + down = tf.keras.layers.Conv2D(filters=self._fpn_feat_dims, + strides=(1, 1), + kernel_size=(3, 3), + padding='same', + name = f"down_{level}", + **conv_args) + down_norm = tf.keras.layers.GroupNormalization(name = f"down_norm_{level}") + self._conv2d_op_down.append(down) + self._down_groupnorm.append(down_norm) + + self._conv2d_op_mask = tf.keras.layers.Conv2D( + filters=self._fpn_feat_dims, + kernel_size=(3, 3), + padding='same', + name = "mask_proj", + **conv_args) + + self._relu1 = tf.keras.layers.ReLU() + self._relu2 = tf.keras.layers.ReLU() + + if not self._channels_last: + self._permute1 = tf.keras.layers.Permute((2, 3, 1)) + self._permute2 = tf.keras.layers.Permute((2, 3, 1)) + + super(TransformerFPN, self).build(multilevel_features) + + def _generate_image_mask(self, features: tf.Tensor) -> tf.Tensor: + """Generates image mask from input image.""" + mask = tf.zeros([features.shape[0],features.shape[1],features.shape[2]]) + mask = tf.cast(mask, dtype = bool) + return mask + + def call(self, multilevel_features): + """Returns the FPN features for a given multilevel features. + Args: + multilevel_features: a `dict` containing `int` keys for continuous feature + levels, e.g., [2, 3, 4, 5]. The values are corresponding features with + shape [batch_size, height_l, width_l, num_filters]. + Returns: + Mask projection + """ + input_levels = list(multilevel_features.keys()) + + feat = multilevel_features[input_levels[-1]] + + if not self._channels_last: + feat = self._permute_1(feat) + + mask = self._generate_image_mask(feat) + pos_embed = position_embedding_sine( + mask, num_pos_features=self._fpn_feat_dims) + + features = self._input_proj(feat) + + transformer = self._transformer_encoder(features, None, pos_embed) + + + down = self._conv2d_op_down[0](transformer) + down = self._down_groupnorm[0](down) + down = self._relu1(down) + + levels = input_levels[:-1] + for i, level in enumerate(levels[::-1]): + feat = multilevel_features[level] + + if not self._channels_last: + feat = self._permute_2(multilevel_features[level]) + + lateral = self._conv2d_op_lateral[i](feat) + lateral = self._lateral_groupnorm[i](lateral) + + down = self._interpolations[i](down) + lateral + + down = self._conv2d_op_down[i + 1](down) + down = self._down_groupnorm[i+1](down) + down = self._relu2(down) + + mask = self._conv2d_op_mask(down) + + return mask, transformer \ No newline at end of file diff --git a/models/official/projects/maskformer/modeling/decoder/__pycache__/__init__.cpython-39.pyc b/models/official/projects/maskformer/modeling/decoder/__pycache__/__init__.cpython-39.pyc index 8c1dfc2e..c1bea057 100644 Binary files a/models/official/projects/maskformer/modeling/decoder/__pycache__/__init__.cpython-39.pyc and b/models/official/projects/maskformer/modeling/decoder/__pycache__/__init__.cpython-39.pyc differ diff --git a/models/official/projects/maskformer/modeling/decoder/__pycache__/detr_transformer.cpython-39.pyc b/models/official/projects/maskformer/modeling/decoder/__pycache__/detr_transformer.cpython-39.pyc new file mode 100644 index 00000000..1e3e4ae3 Binary files /dev/null and b/models/official/projects/maskformer/modeling/decoder/__pycache__/detr_transformer.cpython-39.pyc differ diff --git a/models/official/projects/maskformer/modeling/decoder/__pycache__/pixel_decoder.cpython-39.pyc b/models/official/projects/maskformer/modeling/decoder/__pycache__/pixel_decoder.cpython-39.pyc index 7ac47c71..f34aa7a9 100644 Binary files a/models/official/projects/maskformer/modeling/decoder/__pycache__/pixel_decoder.cpython-39.pyc and b/models/official/projects/maskformer/modeling/decoder/__pycache__/pixel_decoder.cpython-39.pyc differ diff --git a/models/official/projects/maskformer/modeling/decoder/__pycache__/transformer_decoder.cpython-39.pyc b/models/official/projects/maskformer/modeling/decoder/__pycache__/transformer_decoder.cpython-39.pyc index 43e3e156..07c69390 100644 Binary files a/models/official/projects/maskformer/modeling/decoder/__pycache__/transformer_decoder.cpython-39.pyc and b/models/official/projects/maskformer/modeling/decoder/__pycache__/transformer_decoder.cpython-39.pyc differ diff --git a/models/official/projects/maskformer/modeling/decoder/__pycache__/transformer_pixel_decoder.cpython-39.pyc b/models/official/projects/maskformer/modeling/decoder/__pycache__/transformer_pixel_decoder.cpython-39.pyc new file mode 100644 index 00000000..2c6fb9c2 Binary files /dev/null and b/models/official/projects/maskformer/modeling/decoder/__pycache__/transformer_pixel_decoder.cpython-39.pyc differ diff --git a/models/official/projects/maskformer/modeling/decoder/detr_transformer.py b/models/official/projects/maskformer/modeling/decoder/detr_transformer.py new file mode 100644 index 00000000..8b3f0d50 --- /dev/null +++ b/models/official/projects/maskformer/modeling/decoder/detr_transformer.py @@ -0,0 +1,83 @@ +import math +import tensorflow as tf + +from official.projects.detr.modeling.detr import position_embedding_sine +from official.projects.detr.modeling import transformer +from official.modeling import tf_utils + +class DETRTransformer(tf.keras.layers.Layer): + """Encoder and Decoder of DETR.""" + + def __init__(self, num_encoder_layers=6, num_decoder_layers=6, + dropout_rate=0.1, **kwargs): + super().__init__(**kwargs) + self._dropout_rate = dropout_rate + self._num_encoder_layers = num_encoder_layers + self._num_decoder_layers = num_decoder_layers + + def build(self, input_shape=None): + if self._num_encoder_layers > 0: + self._encoder = transformer.TransformerEncoder( + attention_dropout_rate=self._dropout_rate, + dropout_rate=self._dropout_rate, + intermediate_dropout=self._dropout_rate, + norm_first=False, + num_layers=self._num_encoder_layers) + else: + self._encoder = None + + self._decoder = transformer.TransformerDecoder( + attention_dropout_rate=self._dropout_rate, + dropout_rate=self._dropout_rate, + intermediate_dropout=self._dropout_rate, + norm_first=False, + num_layers=self._num_decoder_layers) + super().build(input_shape) + + def get_config(self): + return { + "num_encoder_layers": self._num_encoder_layers, + "num_decoder_layers": self._num_decoder_layers, + "dropout_rate": self._dropout_rate, + } + + def call(self, inputs): + sources = inputs["inputs"] + targets = inputs["targets"] + pos_embed = inputs["pos_embed"] + mask = inputs["mask"] + input_shape = tf_utils.get_shape_list(sources) + if mask is not None: + source_attention_mask = tf.tile( + tf.expand_dims(mask, axis=1), [1, input_shape[1], 1]) + else: + source_attention_mask = None + if self._encoder is not None: + memory = self._encoder( + sources, attention_mask=source_attention_mask, pos_embed=pos_embed) + else: + memory = sources + + target_shape = tf_utils.get_shape_list(targets) + target_shape = tf.shape(targets) + + if mask is not None: + cross_attention_mask = tf.tile( + tf.expand_dims(mask, axis=1), [1, target_shape[1], 1]) + self_attention_mask=tf.ones( + (target_shape[0], target_shape[1], target_shape[1])) + else: + cross_attention_mask = None + self_attention_mask = None + + decoded = self._decoder( + tf.zeros_like(targets), + memory, + # TODO(b/199545430): self_attention_mask could be set to None when this + # bug is resolved. Passing ones for now. + self_attention_mask=self_attention_mask, + cross_attention_mask=cross_attention_mask, + return_all_decoder_outputs=False, + input_pos_embed=targets, + memory_pos_embed=pos_embed) + return decoded \ No newline at end of file diff --git a/models/official/projects/maskformer/modeling/decoder/pixel_decoder.py b/models/official/projects/maskformer/modeling/decoder/pixel_decoder.py index 93d1806b..fb9584b5 100644 --- a/models/official/projects/maskformer/modeling/decoder/pixel_decoder.py +++ b/models/official/projects/maskformer/modeling/decoder/pixel_decoder.py @@ -1,66 +1,118 @@ +"""Feature Pyramid Networks used in MaskFormer.""" import tensorflow as tf import tensorflow_addons as tfa from official.vision.ops.spatial_transform_ops import nearest_upsampling class Fpn(tf.keras.layers.Layer): - """Feature pyramid networks.""" + """MaskFormer Feature Pyramid Networks.""" def __init__(self, fpn_feat_dims=256, + data_format=None, + dilation_rate=(1, 1), + groups=1, + activation='relu', + use_bias=False, + kernel_initializer="glorot_uniform", + bias_initializer="zeros", + kernel_regularizer=None, + bias_regularizer=None, + activity_regularizer=None, + kernel_constraint=None, + bias_constraint=None, **kwargs): """FPN initialization function. - Args: - fpn_feat_dims: Feature dimension of the fpn + fpn_feat_dims: `int`, Feature dimension of the fpn. + + TODO: fill in new args + """ super(Fpn, self).__init__(**kwargs) + # conv2d params self._fpn_feat_dims = fpn_feat_dims - # TODO(Isaac): Add Conv2D parameteres to constructor. - # TODO(Isaac): Add GroupNormalization parameters to constructor. + self._data_format = data_format + self._dilation_rate = dilation_rate + self._groups = groups + self._activation = activation + self._use_bias = use_bias + self._kernel_initializer = kernel_initializer + self._bias_initializer = bias_initializer + self._kernel_regularizer = kernel_regularizer + self._bias_regularizer = bias_regularizer + self._activity_regularizer = activity_regularizer + self._kernel_constraint = kernel_constraint + self._bias_constraint = bias_constraint + if tf.keras.backend.image_data_format() == 'channels_last': + # format: (batch_size, height, width, channels) self._channels_last = True else: + # format: (batch_size, channels, width, height) self._channels_last = False def build(self, multilevel_features): - # TODO(Isaac): Add Conv2D parameters to layers. - # TODO(Isaac): Add GroupNormalization parameters to layers. - + conv_args = { + "data_format": self._data_format, + "dilation_rate": self._dilation_rate, + "groups": self._groups, + "activation": self._activation, + "use_bias": self._use_bias, + "kernel_initializer": self._kernel_initializer, + "bias_initializer": self._bias_initializer, + "kernel_regularizer": self._kernel_regularizer, + "bias_regularizer": self._bias_regularizer, + "activity_regularizer": self._activity_regularizer, + "kernel_constraint": self._kernel_constraint, + "bias_constraint": self._bias_constraint + } + input_levels = list(multilevel_features.keys()) levels = input_levels[:-1] self._conv2d_op_lateral = [] - for _ in levels[::-1]: - lateral = tf.keras.layers.Conv2D( - filters=self._fpn_feat_dims, - kernel_size=(1, 1), - padding='same') + self._lateral_groupnorm = [] + for level in levels[::-1]: + lateral = tf.keras.layers.Conv2D(filters=self._fpn_feat_dims, + kernel_size=(1, 1), + padding='same', + name = f"lateral_{level}", + **conv_args) + lateral_norm = tf.keras.layers.GroupNormalization(name = f"lateral_norm_{level}") self._conv2d_op_lateral.append(lateral) + self._lateral_groupnorm.append(lateral_norm) self._conv2d_op_down = [] - down = tf.keras.layers.Conv2D( - filters=self._fpn_feat_dims, - strides=(1, 1), - kernel_size=(3, 3), - padding='same') + self._down_groupnorm = [] + down = tf.keras.layers.Conv2D(filters=self._fpn_feat_dims, + strides=(1, 1), + kernel_size=(3, 3), + padding='same', + name = "down_initial_conv", + **conv_args) + down_norm = tf.keras.layers.GroupNormalization(name = "down_initial_norm") + self._down_groupnorm.append(down_norm) self._conv2d_op_down.append(down) - for _ in levels[::-1]: - down = tf.keras.layers.Conv2D( - filters=self._fpn_feat_dims, - strides=(1, 1), - kernel_size=(3, 3), - padding='same') + + for level in levels[::-1]: + down = tf.keras.layers.Conv2D(filters=self._fpn_feat_dims, + strides=(1, 1), + kernel_size=(3, 3), + padding='same', + name = f"down_{level}", + **conv_args) + down_norm = tf.keras.layers.GroupNormalization(name = f"down_norm_{level}") self._conv2d_op_down.append(down) + self._down_groupnorm.append(down_norm) self._conv2d_op_mask = tf.keras.layers.Conv2D( filters=self._fpn_feat_dims, kernel_size=(3, 3), - padding='same') - - self._group_norm1 = tfa.layers.GroupNormalization() - self._group_norm2 = tfa.layers.GroupNormalization() + padding='same', + name = "mask_proj", + **conv_args) self._relu1 = tf.keras.layers.ReLU() self._relu2 = tf.keras.layers.ReLU() @@ -88,7 +140,7 @@ def call(self, multilevel_features): feat = self._permute_1(feat) down = self._conv2d_op_down[0](feat) - down = self._group_norm1(down) + down = self._down_groupnorm[0](down) down = self._relu1(down) levels = input_levels[:-1] @@ -99,17 +151,15 @@ def call(self, multilevel_features): feat = self._permute_2(multilevel_features[level]) lateral = self._conv2d_op_lateral[i](feat) - - upsample = nearest_upsampling(down, 2) - - # When width or height is odd there is a shape mismatch with scale=2. - if (upsample.shape != lateral.shape): - upsample = upsample[:,:lateral.shape[1],:lateral.shape[2],:] - - down = upsample + lateral + lateral = self._lateral_groupnorm[i](lateral) + print(down.shape) + print(nearest_upsampling(down, 2).shape) + print(lateral.shape) + exit() + down = nearest_upsampling(down, 2) + lateral down = self._conv2d_op_down[i + 1](down) - down = self._group_norm2(down) + down = self._down_groupnorm[i+1](down) down = self._relu2(down) mask = self._conv2d_op_mask(down) diff --git a/models/official/projects/maskformer/modeling/decoder/pixel_decoder_test.py b/models/official/projects/maskformer/modeling/decoder/pixel_decoder_test.py index 470f881b..14461e25 100644 --- a/models/official/projects/maskformer/modeling/decoder/pixel_decoder_test.py +++ b/models/official/projects/maskformer/modeling/decoder/pixel_decoder_test.py @@ -18,55 +18,47 @@ class FpnTest(parameterized.TestCase, tf.test.TestCase): - @parameterized.named_parameters(('test1', "coco_stuff", 256), ('test2', "coco_panoptic", 256)) - def test_pass_through(self, testcase_input_name, dim): - - testcase_backbone_inputs = { - "coco_stuff": { - "2": tf.ones([1, 160, 160, 256]), - "3": tf.ones([1, 80, 80, 512]), - "4": tf.ones([1, 40, 40, 1024]), - "5": tf.ones([1, 20, 20, 2048]) - }, - "coco_panoptic": { - "2": tf.ones([1, 152, 228, 256]), - "3": tf.ones([1, 76, 114, 512]), - "4": tf.ones([1, 38, 57, 1024]), - "5": tf.ones([1, 19, 29, 2048]) - } + @parameterized.named_parameters(('test1', 256),) + def test_pass_through(self, dim): + + multilevel_features = { + "2": tf.ones([1, 160, 160, 256]), + "3": tf.ones([1, 80, 80, 512]), + "4": tf.ones([1, 40, 40, 1024]), + "5": tf.ones([1, 20, 20, 2048]) } # TODO(Isaac): Add the additional parameters. decoder = Fpn(fpn_feat_dims=dim) - output_mask = decoder(testcase_backbone_inputs[testcase_input_name]) + output_mask = decoder(multilevel_features) - expected_output_mask = testcase_backbone_inputs[testcase_input_name]["2"].shape.as_list() + expected_output_mask = multilevel_features["2"].shape.as_list() self.assertAllEqual(output_mask.shape.as_list(), expected_output_mask) - # @combinations.generate( - # combinations.combine( - # strategy=[ - # strategy_combinations.cloud_tpu_strategy, - # strategy_combinations.one_device_strategy_gpu, - # ], - # use_sync_bn=[False, True], - # )) - # def test_sync_bn_multiple_devices(self, strategy, use_sync_bn): - # """Test for sync bn on TPU and GPU devices.""" - - # tf.keras.backend.set_image_data_format('channels_last') - - # with strategy.scope(): - - # multilevel_features = { - # 2: tf.ones([1, 160, 160, 256]), - # 3: tf.ones([1, 80, 80, 512]), - # 4: tf.ones([1, 40, 40, 1024]), - # 5: tf.ones([1, 20, 20, 2048])} - - # decoder = Fpn() - # _ = decoder(multilevel_features) + @combinations.generate( + combinations.combine( + strategy=[ + strategy_combinations.cloud_tpu_strategy, + strategy_combinations.one_device_strategy_gpu, + ], + use_sync_bn=[False, True], + )) + def test_sync_bn_multiple_devices(self, strategy, use_sync_bn): + """Test for sync bn on TPU and GPU devices.""" + + tf.keras.backend.set_image_data_format('channels_last') + + with strategy.scope(): + + multilevel_features = { + 2: tf.ones([1, 160, 160, 256]), + 3: tf.ones([1, 80, 80, 512]), + 4: tf.ones([1, 40, 40, 1024]), + 5: tf.ones([1, 20, 20, 2048])} + + decoder = Fpn() + _ = decoder(multilevel_features) if __name__ == '__main__': diff --git a/models/official/projects/maskformer/modeling/decoder/transformer_decoder.py b/models/official/projects/maskformer/modeling/decoder/transformer_decoder.py index b882e9a2..721aee39 100644 --- a/models/official/projects/maskformer/modeling/decoder/transformer_decoder.py +++ b/models/official/projects/maskformer/modeling/decoder/transformer_decoder.py @@ -1,16 +1,17 @@ import math import tensorflow as tf -from official.projects.detr.modeling.detr import DETRTransformer, position_embedding_sine - +from official.projects.detr.modeling.detr import position_embedding_sine +from official.projects.detr.modeling import transformer +from official.modeling import tf_utils +from official.projects.maskformer.modeling.decoder.detr_transformer import DETRTransformer + class MaskFormerTransformer(tf.keras.layers.Layer): def __init__(self, backbone_endpoint_name, - batch_size, num_queries, hidden_size, - num_classes, - num_encoder_layers=6, + num_encoder_layers=0, num_decoder_layers=6, dropout_rate=0.1, **kwargs): @@ -19,13 +20,11 @@ def __init__(self, self._backbone_endpoint_name = backbone_endpoint_name # Embeddings parameters. - self._batch_size = batch_size self._num_queries = num_queries self._hidden_size = hidden_size if hidden_size % 2 != 0: raise ValueError("hidden_size must be a multiple of 2.") - self._num_classes = num_classes # DETRTransformer parameters. self._num_encoder_layers = num_encoder_layers @@ -46,39 +45,28 @@ def build(self, input_shape): dtype=tf.float32) sqrt_k = math.sqrt(1.0 / self._hidden_size) - - # TODO(ibrahim): NOT USED, remove num classes parameters as well. - self._class_embed = tf.keras.layers.Dense( - self._num_classes, - kernel_initializer=tf.keras.initializers.RandomUniform(-sqrt_k, sqrt_k), - name="detr/cls_dense") self._input_proj = tf.keras.layers.Conv2D( self._hidden_size, 1, name="detr/conv2d") - def _generate_image_mask(self, inputs: tf.Tensor, - target_shape: tf.Tensor) -> tf.Tensor: + def _generate_image_mask(self, features: tf.Tensor) -> tf.Tensor: """Generates image mask from input image.""" - mask = tf.expand_dims( - tf.cast(tf.not_equal(tf.reduce_sum(inputs, axis=-1), 0), inputs.dtype), - axis=-1) - mask = tf.image.resize( - mask, target_shape, method=tf.image.ResizeMethod.NEAREST_NEIGHBOR) + mask = tf.zeros([features.shape[0],features.shape[1],features.shape[2]]) + mask = tf.cast(mask, dtype = bool) return mask def call(self, inputs): - input_image = inputs['image'] - features = inputs['features'][self._backbone_endpoint_name] + features = inputs['features'] + batch_size = features.shape[0] - mask = self._generate_image_mask(input_image, tf.shape(features)[1: 3]) + mask = self._generate_image_mask(features) pos_embed = position_embedding_sine( - mask[:, :, :, 0], num_pos_features=self._hidden_size) - pos_embed = tf.reshape(pos_embed, [self._batch_size, -1, self._hidden_size]) + mask, num_pos_features=self._hidden_size) + pos_embed = tf.reshape(pos_embed, [batch_size, -1, self._hidden_size]) features = tf.reshape( - self._input_proj(features), [self._batch_size, -1, self._hidden_size]) - mask = tf.reshape(mask, [self._batch_size, -1]) + self._input_proj(features), [batch_size, -1, self._hidden_size]) decoded_list = self._transformer({ "inputs": @@ -86,9 +74,9 @@ def call(self, inputs): "targets": tf.tile( tf.expand_dims(self._query_embeddings, axis=0), - (self._batch_size, 1, 1)), + (batch_size, 1, 1)), "pos_embed": pos_embed, - "mask": mask, + "mask": None, }) return decoded_list @@ -98,8 +86,7 @@ def get_config(self): "backbone_endpoint_name": self._backbone_endpoint_name, "num_queries": self._num_queries, "hidden_size": self._hidden_size, - "num_classes": self._num_classes, "num_encoder_layers": self._num_encoder_layers, "num_decoder_layers": self._num_decoder_layers, "dropout_rate": self._dropout_rate, - } \ No newline at end of file + } diff --git a/models/official/projects/maskformer/modeling/decoder/transformer_decoder_test.py b/models/official/projects/maskformer/modeling/decoder/transformer_decoder_test.py index 61770478..dcdcce6b 100644 --- a/models/official/projects/maskformer/modeling/decoder/transformer_decoder_test.py +++ b/models/official/projects/maskformer/modeling/decoder/transformer_decoder_test.py @@ -4,18 +4,22 @@ # from transformer import MaskFormerTransformer from official.projects.maskformer.modeling.decoder.transformer_decoder import MaskFormerTransformer - class MaskFormerTransformerTest(tf.test.TestCase, parameterized.TestCase): - @parameterized.named_parameters(("test1", "coco_stuff", "5", 8, 100, 256, 171,), - ("test2", "coco_panoptic", "5", 1, 100, 256, 133,)) + @parameterized.named_parameters(('test1', '5', 8, 100, 256, 10,)) def test_pass_through(self, - testcase_input_name, - backbone_endpoint_name, - batch_size, - num_queries, - hidden_size, - num_classes): + backbone_endpoint_name, + batch_size, + num_queries, + hidden_size, + num_classes): + + multilevel_features = { + "2": tf.ones([1, 160, 160, 256]), + "3": tf.ones([1, 80, 80, 512]), + "4": tf.ones([1, 40, 40, 1024]), + "5": tf.ones([1, 20, 20, 2048]) + } transformer = MaskFormerTransformer(backbone_endpoint_name=backbone_endpoint_name, batch_size=batch_size, @@ -26,34 +30,13 @@ def test_pass_through(self, num_decoder_layers=6, dropout_rate=0.1) - testcase_input_image = { - "coco_stuff": tf.ones((1, 640, 640, 3)), - "coco_panoptic": tf.ones((1, 608, 911, 3)), - } - - testcase_backbone_inputs = { - "coco_stuff": { - "2": tf.ones([1, 160, 160, 256]), - "3": tf.ones([1, 80, 80, 512]), - "4": tf.ones([1, 40, 40, 1024]), - "5": tf.ones([1, 20, 20, 2048]) - }, - "coco_panoptic": { - "2": tf.ones([1, 152, 228, 256]), - "3": tf.ones([1, 76, 114, 512]), - "4": tf.ones([1, 38, 57, 1024]), - "5": tf.ones([1, 19, 29, 2048]) - } - } - - expected_output_shape = [6, batch_size, num_queries, 256] + input_image = tf.ones((1, 640, 640, 3)) + expected_output_shape = [6, 8, 100, 256] - output = transformer( - {"image": testcase_input_image[testcase_input_name], "features": testcase_backbone_inputs[testcase_input_name]}) + output = transformer({"image": input_image, "features": multilevel_features }) output_shape = [len(output)] + output[0].shape.as_list() self.assertAllEqual(output_shape, expected_output_shape) - -if __name__ == "__main__": +if __name__ == '__main__': tf.test.main() diff --git a/models/official/projects/maskformer/modeling/decoder/transformer_pixel_decoder.py b/models/official/projects/maskformer/modeling/decoder/transformer_pixel_decoder.py new file mode 100644 index 00000000..15794033 --- /dev/null +++ b/models/official/projects/maskformer/modeling/decoder/transformer_pixel_decoder.py @@ -0,0 +1,196 @@ +import tensorflow as tf +import tensorflow_addons as tfa +from official.vision.ops.spatial_transform_ops import nearest_upsampling +from official.projects.detr.modeling.detr import position_embedding_sine +from official.projects.detr.modeling.transformer import TransformerEncoder +from official.projects.maskformer.modeling.decoder.detr_transformer import DETRTransformer + +class TransformerFPN(tf.keras.layers.Layer): + """MaskFormer Feature Pyramid Networks.""" + + def __init__(self, + fpn_feat_dims=256, + data_format=None, + dilation_rate=(1, 1), + groups=1, + activation='relu', + use_bias=False, + kernel_initializer="glorot_uniform", + bias_initializer="zeros", + kernel_regularizer=None, + bias_regularizer=None, + activity_regularizer=None, + kernel_constraint=None, + bias_constraint=None, + num_encoder_layers = 0, + **kwargs): + """FPN initialization function. + Args: + fpn_feat_dims: `int`, Feature dimension of the fpn. + + TODO: fill in new args + + """ + super(TransformerFPN, self).__init__(**kwargs) + + # conv2d params + self._fpn_feat_dims = fpn_feat_dims + self._data_format = data_format + self._dilation_rate = dilation_rate + self._groups = groups + self._activation = activation + self._use_bias = use_bias + self._kernel_initializer = kernel_initializer + self._bias_initializer = bias_initializer + self._kernel_regularizer = kernel_regularizer + self._bias_regularizer = bias_regularizer + self._activity_regularizer = activity_regularizer + self._kernel_constraint = kernel_constraint + self._bias_constraint = bias_constraint + self._num_encoder_layers = num_encoder_layers + + + if tf.keras.backend.image_data_format() == 'channels_last': + # format: (batch_size, height, width, channels) + self._channels_last = True + else: + # format: (batch_size, channels, width, height) + self._channels_last = False + + def build(self, multilevel_features): + conv_args = { + "data_format": self._data_format, + "dilation_rate": self._dilation_rate, + "groups": self._groups, + "activation": None, + "use_bias": self._use_bias, + "kernel_initializer": self._kernel_initializer, + "bias_initializer": self._bias_initializer, + "kernel_regularizer": self._kernel_regularizer, + "bias_regularizer": self._bias_regularizer, + "activity_regularizer": self._activity_regularizer, + "kernel_constraint": self._kernel_constraint, + "bias_constraint": self._bias_constraint + } + + input_levels = list(multilevel_features.keys()) + levels = input_levels[:-1] + + self._input_proj = tf.keras.layers.Conv2D(filters=self._fpn_feat_dims, + kernel_size=(1, 1), + padding='same', + name = f"input_proj", + use_bias = True) + self._transformer_encoder = TransformerEncoder(norm_first=False, + dropout_rate = .1, + num_layers=self._num_encoder_layers) + self._interpolations = [] + self._conv2d_op_lateral = [] + self._lateral_groupnorm = [] + for level in levels[::-1]: + lateral = tf.keras.layers.Conv2D(filters=self._fpn_feat_dims, + kernel_size=(1, 1), + padding='same', + name = f"lateral_{level}", + **conv_args) + lateral_norm = tf.keras.layers.GroupNormalization(name = f"lateral_norm_{level}") + interpolate = tf.keras.layers.Resizing( + multilevel_features[level][1], multilevel_features[level][2], interpolation = "nearest") + + self._conv2d_op_lateral.append(lateral) + self._lateral_groupnorm.append(lateral_norm) + self._interpolations.append(interpolate) + + self._conv2d_op_down = [] + self._down_groupnorm = [] + down = tf.keras.layers.Conv2D(filters=self._fpn_feat_dims, + strides=(1, 1), + kernel_size=(3, 3), + padding='same', + name = "down_initial_conv", + **conv_args) + down_norm = tf.keras.layers.GroupNormalization(name = "down_initial_norm") + self._down_groupnorm.append(down_norm) + self._conv2d_op_down.append(down) + + for level in levels[::-1]: + down = tf.keras.layers.Conv2D(filters=self._fpn_feat_dims, + strides=(1, 1), + kernel_size=(3, 3), + padding='same', + name = f"down_{level}", + **conv_args) + down_norm = tf.keras.layers.GroupNormalization(name = f"down_norm_{level}") + self._conv2d_op_down.append(down) + self._down_groupnorm.append(down_norm) + + self._conv2d_op_mask = tf.keras.layers.Conv2D( + filters=self._fpn_feat_dims, + kernel_size=(3, 3), + padding='same', + name = "mask_proj", + **conv_args) + + self._relu1 = tf.keras.layers.ReLU() + self._relu2 = tf.keras.layers.ReLU() + + if not self._channels_last: + self._permute1 = tf.keras.layers.Permute((2, 3, 1)) + self._permute2 = tf.keras.layers.Permute((2, 3, 1)) + + super(TransformerFPN, self).build(multilevel_features) + + def _generate_image_mask(self, features: tf.Tensor) -> tf.Tensor: + """Generates image mask from input image.""" + mask = tf.zeros([features.shape[0],features.shape[1],features.shape[2]]) + mask = tf.cast(mask, dtype = bool) + return mask + + def call(self, multilevel_features): + """Returns the FPN features for a given multilevel features. + Args: + multilevel_features: a `dict` containing `int` keys for continuous feature + levels, e.g., [2, 3, 4, 5]. The values are corresponding features with + shape [batch_size, height_l, width_l, num_filters]. + Returns: + Mask projection + """ + input_levels = list(multilevel_features.keys()) + + feat = multilevel_features[input_levels[-1]] + + if not self._channels_last: + feat = self._permute_1(feat) + + mask = self._generate_image_mask(feat) + pos_embed = position_embedding_sine( + mask, num_pos_features=self._fpn_feat_dims) + + features = self._input_proj(feat) + + transformer = self._transformer_encoder(features, None, pos_embed) + + + down = self._conv2d_op_down[0](transformer) + down = self._down_groupnorm[0](down) + down = self._relu1(down) + + levels = input_levels[:-1] + for i, level in enumerate(levels[::-1]): + feat = multilevel_features[level] + + if not self._channels_last: + feat = self._permute_2(multilevel_features[level]) + + lateral = self._conv2d_op_lateral[i](feat) + lateral = self._lateral_groupnorm[i](lateral) + + down = self._interpolations[i](down) + lateral + + down = self._conv2d_op_down[i + 1](down) + down = self._down_groupnorm[i+1](down) + down = self._relu2(down) + + mask = self._conv2d_op_mask(down) + + return mask, transformer diff --git a/models/official/projects/maskformer/modeling/decoder/trasformer_decoder.pu b/models/official/projects/maskformer/modeling/decoder/trasformer_decoder.pu new file mode 100644 index 00000000..e69de29b diff --git a/models/official/projects/maskformer/modeling/layers/.ipynb_checkpoints/nn_block-checkpoint.py b/models/official/projects/maskformer/modeling/layers/.ipynb_checkpoints/nn_block-checkpoint.py new file mode 100644 index 00000000..74cc8027 --- /dev/null +++ b/models/official/projects/maskformer/modeling/layers/.ipynb_checkpoints/nn_block-checkpoint.py @@ -0,0 +1,80 @@ +import tensorflow as tf + +''' +Transformer Parameters: + +enc_layers: int, +dec_layers: int, +nheads: int, +dropout: float, +dim_feedforward: int, +pre_norm: bool, +enforce_input_project: bool +''' + + +class MLPHead(tf.keras.layers.Layer): + def __init__(self, + num_classes: int, + hidden_dim: int, + # dec_supervision: bool, + mask_dim: int): + super().__init__() + + self._num_classes = num_classes + self._hidden_dim = hidden_dim + self._mask_dim = mask_dim + + def build(self, input_shape): + self._mlp = MLP(self._hidden_dim, self._hidden_dim, self._mask_dim, 3) + self._linear_classifier = tf.keras.layers.Dense(self._num_classes + 1) + # No Softmax used in their code? Need to figure out!! + # self.linear_classifier = tf.keras.layers.Dense(input_shape=hidden_dim, out_dim=num_classes + 1, activation=None) + + # self.dec_supervision = dec_supervision + + def call(self, inputs): + per_pixel_embeddings = inputs['per_pixel_embeddings'] + per_segment_embeddings = inputs['per_segment_embeddings'] + + class_prob_prediction = self._linear_classifier(per_segment_embeddings) + mask_embedding = self._mlp(per_segment_embeddings) + mask_prob_prediction = tf.einsum( + "bqc,bhwc->bhwq", mask_embedding, per_pixel_embeddings) + + return {'class_prob_predictions': class_prob_prediction,'mask_prob_predictions': mask_prob_prediction} + + +class MLP(tf.keras.layers.Layer): + def __init__(self, + input_dim: int, + hidden_dim: int, + output_dim: int, + num_layers: int): + super().__init__() + + self._input_dim = input_dim + self._hidden_dim = hidden_dim + self._output_dim = output_dim + self._num_layers = num_layers + + def build(self, input_shape): + layer_dims = [(self._input_dim, self._hidden_dim)] + for _ in range(self._num_layers - 2): + layer_dims.append((self._hidden_dim, self._hidden_dim)) + layer_dims.append((self._hidden_dim, self._output_dim)) + + self._layers = [] + for i, dim in enumerate(layer_dims): + if(i < self._num_layers - 1): + self._layers.append(tf.keras.layers.Dense( + dim[1], activation=tf.nn.relu)) + else: + # Final Layer + self._layers.append( + tf.keras.layers.Dense(dim[1], activation=None)) + + def call(self, x): + for layer in self._layers: + x = layer(x) + return x diff --git a/models/official/projects/maskformer/modeling/layers/.ipynb_checkpoints/nn_block_test-checkpoint.py b/models/official/projects/maskformer/modeling/layers/.ipynb_checkpoints/nn_block_test-checkpoint.py new file mode 100644 index 00000000..97090df9 --- /dev/null +++ b/models/official/projects/maskformer/modeling/layers/.ipynb_checkpoints/nn_block_test-checkpoint.py @@ -0,0 +1,29 @@ +from absl.testing import parameterized +import tensorflow as tf + +from official.projects.maskformer.modeling.layers.nn_block import MLPHead + +class MaskFormerTransformerTest(tf.test.TestCase, parameterized.TestCase): + + @parameterized.named_parameters(("test1", 256, 256, 171)) + def test_pass_through(self, + mask_dim, + hidden_size, + num_classes): + + MLP_head = MLPHead( + num_classes=num_classes, hidden_dim=hidden_size, mask_dim=mask_dim) + + inputs = {"per_segment_embeddings": tf.ones((6, 8, 100, 256)), + "per_pixel_embeddings": tf.ones((8, 160, 160, 256))} + + expected_class_probs_shape = [8, 100, 172] + expected_mask_probs_shape = [8, 100, 160, 160] + + output = MLP_head(inputs) + + self.assertAllEqual(output["class_prob_predictions"].shape.as_list(), expected_class_probs_shape) + self.assertAllEqual(output["mask_prob_predictions"].shape.as_list(), expected_mask_probs_shape) + +if __name__ == '__main__': + tf.test.main() diff --git a/models/official/projects/maskformer/modeling/layers/__pycache__/__init__.cpython-39.pyc b/models/official/projects/maskformer/modeling/layers/__pycache__/__init__.cpython-39.pyc index 0bcaeb5c..015a4416 100644 Binary files a/models/official/projects/maskformer/modeling/layers/__pycache__/__init__.cpython-39.pyc and b/models/official/projects/maskformer/modeling/layers/__pycache__/__init__.cpython-39.pyc differ diff --git a/models/official/projects/maskformer/modeling/layers/__pycache__/nn_block.cpython-39.pyc b/models/official/projects/maskformer/modeling/layers/__pycache__/nn_block.cpython-39.pyc index 4eb6c54c..790d7ae5 100644 Binary files a/models/official/projects/maskformer/modeling/layers/__pycache__/nn_block.cpython-39.pyc and b/models/official/projects/maskformer/modeling/layers/__pycache__/nn_block.cpython-39.pyc differ diff --git a/models/official/projects/maskformer/modeling/layers/nn_block.py b/models/official/projects/maskformer/modeling/layers/nn_block.py index 41bef080..74cc8027 100644 --- a/models/official/projects/maskformer/modeling/layers/nn_block.py +++ b/models/official/projects/maskformer/modeling/layers/nn_block.py @@ -38,11 +38,11 @@ def call(self, inputs): per_segment_embeddings = inputs['per_segment_embeddings'] class_prob_prediction = self._linear_classifier(per_segment_embeddings) - mask_embedding = self._mlp(per_segment_embeddings[-1]) + mask_embedding = self._mlp(per_segment_embeddings) mask_prob_prediction = tf.einsum( "bqc,bhwc->bhwq", mask_embedding, per_pixel_embeddings) - return {'class_prob_predictions': class_prob_prediction[-1],'mask_prob_predictions': mask_prob_prediction} + return {'class_prob_predictions': class_prob_prediction,'mask_prob_predictions': mask_prob_prediction} class MLP(tf.keras.layers.Layer): diff --git a/models/official/projects/maskformer/modeling/layers/nn_block_test.py b/models/official/projects/maskformer/modeling/layers/nn_block_test.py index 5d3e3320..97090df9 100644 --- a/models/official/projects/maskformer/modeling/layers/nn_block_test.py +++ b/models/official/projects/maskformer/modeling/layers/nn_block_test.py @@ -5,38 +5,22 @@ class MaskFormerTransformerTest(tf.test.TestCase, parameterized.TestCase): - @parameterized.named_parameters(("test1", "coco_stuff", 256, 256, 171, 100, 8), ("test2", "coco_panoptic", 256, 256, 133, 100, 1)) + @parameterized.named_parameters(("test1", 256, 256, 171)) def test_pass_through(self, - testcase_input_name, mask_dim, hidden_size, - num_classes, - num_queries, - batch_size): + num_classes): - mlp_head = MLPHead( + MLP_head = MLPHead( num_classes=num_classes, hidden_dim=hidden_size, mask_dim=mask_dim) - testcase_inputs = { - "coco_stuff": { - "per_segment_embeddings": tf.ones((6, 8, 100, 256)), - "per_pixel_embeddings": tf.ones((8, 160, 160, 256)) - }, - "coco_panoptic": { - "per_segment_embeddings": tf.ones((6, 1, 100, 256)), - "per_pixel_embeddings": tf.ones((1, 152, 228, 256)) - } - } - # expected_class_probs_shape = [8, 100, 172] - expected_class_probs_shape = [batch_size, num_queries, num_classes + 1] - - # expected_mask_probs_shape = [8, 100, 160, 160] - expected_mask_probs_shape = [batch_size, - testcase_inputs[testcase_input_name]["per_pixel_embeddings"].shape[1], - testcase_inputs[testcase_input_name]["per_pixel_embeddings"].shape[2], - num_queries] - - output = mlp_head(testcase_inputs[testcase_input_name]) + inputs = {"per_segment_embeddings": tf.ones((6, 8, 100, 256)), + "per_pixel_embeddings": tf.ones((8, 160, 160, 256))} + + expected_class_probs_shape = [8, 100, 172] + expected_mask_probs_shape = [8, 100, 160, 160] + + output = MLP_head(inputs) self.assertAllEqual(output["class_prob_predictions"].shape.as_list(), expected_class_probs_shape) self.assertAllEqual(output["mask_prob_predictions"].shape.as_list(), expected_mask_probs_shape) diff --git a/models/official/projects/maskformer/modeling/maskformer.py b/models/official/projects/maskformer/modeling/maskformer.py index efa81667..f0fb1211 100644 --- a/models/official/projects/maskformer/modeling/maskformer.py +++ b/models/official/projects/maskformer/modeling/maskformer.py @@ -2,46 +2,118 @@ from official.vision.modeling.backbones import resnet from official.projects.maskformer.modeling.decoder.transformer_decoder import MaskFormerTransformer -from official.projects.maskformer.modeling.decoder.pixel_decoder import Fpn from official.projects.maskformer.modeling.layers.nn_block import MLPHead +from official.projects.maskformer.modeling.decoder.transformer_pixel_decoder import TransformerFPN # TODO(ibrahim): Add all parameters model parameters and remove hardcoding. class MaskFormer(tf.keras.Model): - def __init__(self, num_classes, num_queries, **kwargs): - super().__init__(**kwargs) - self.num_classes = num_classes - self.num_queries = num_queries + """Maskformer""" + def __init__(self, + input_specs, + fpn_feat_dims=256, + data_format=None, + dilation_rate=(1, 1), + groups=1, + activation='relu', + use_bias=False, + kernel_initializer="glorot_uniform", + bias_initializer="zeros", + kernel_regularizer=None, + bias_regularizer=None, + activity_regularizer=None, + kernel_constraint=None, + bias_constraint=None, + num_queries=100, + hidden_size=256, + fpn_encoder_layers=6, + detr_encoder_layers=0, + num_decoder_layers=6, + dropout_rate=0.1, + backbone_endpoint_name='5', + num_classes=133, + batch_size=1, + **kwargs): + self._input_specs = input_specs + self._batch_size = batch_size + self._num_classes = num_classes - def build(self, input_shape): - self._backbone = resnet.ResNet(50) - self._transformer_decoder = MaskFormerTransformer(backbone_endpoint_name='5', - batch_size=1, - num_queries=self.num_queries, - hidden_size=256, - num_classes=self.num_classes, - num_encoder_layers=0, - num_decoder_layers=6, - dropout_rate=0.1) + # Pixel Deocder paramters. + self._fpn_feat_dims = fpn_feat_dims + self._data_format = data_format + self._dilation_rate = dilation_rate + self._groups = groups + self._activation = activation + self._use_bias = use_bias + self._kernel_initializer = kernel_initializer + self._bias_initializer = bias_initializer + self._kernel_regularizer = kernel_regularizer + self._bias_regularizer = bias_regularizer + self._activity_regularizer = activity_regularizer + self._kernel_constraint = kernel_constraint + self._bias_constraint = bias_constraint - self._pixel_decoder = Fpn(fpn_feat_dims=256) - self._MLP_head = MLPHead( - num_classes=self.num_classes, hidden_dim=256, mask_dim=256) + # DETRTransformer parameters. + self._fpn_encoder_layers = fpn_encoder_layers + self._detr_encoder_layers = detr_encoder_layers + self._num_decoder_layers = num_decoder_layers + self._num_queries = num_queries + self._hidden_size = hidden_size + self._dropout_rate = dropout_rate + self._backbone_endpoint = backbone_endpoint_name + - def call(self, inputs): - feature_maps = self._backbone(inputs) - for i, x in feature_maps.items(): - print(i, " - ", x.shape) + super(MaskFormer, self).__init__(**kwargs) - per_segment_embeddings = self._transformer_decoder( - {"image": inputs, "features": feature_maps}) - print("\n\nper_segment_embeddings:", tf.shape(per_segment_embeddings)) + def build(self, image_shape): + #backbone + print("[Build MaskFormer] image shape: ", image_shape) + self.backbone = resnet.ResNet(50, input_specs=self._input_specs, bn_trainable=False) + #decoders + self.pixel_decoder = TransformerFPN(batch_size = self._batch_size, + fpn_feat_dims=self._fpn_feat_dims, + data_format=self._data_format, + dilation_rate=self._dilation_rate, + groups=self._groups, + activation=self._activation, + use_bias=self._use_bias, + kernel_initializer=self._kernel_initializer, + bias_initializer=self._bias_initializer, + kernel_regularizer=self._kernel_regularizer, + bias_regularizer=self._bias_regularizer, + activity_regularizer=self._activity_regularizer, + kernel_constraint=self._kernel_constraint, + bias_constraint=self._bias_constraint, + num_encoder_layers = self._fpn_encoder_layers) + self.transformer = MaskFormerTransformer(backbone_endpoint_name=self._backbone_endpoint, + batch_size=self._batch_size, + num_queries=self._num_queries, + hidden_size=self._hidden_size, + num_encoder_layers=self._detr_encoder_layers, + num_decoder_layers=self._num_decoder_layers, + dropout_rate=self._dropout_rate) + self.head = MLPHead(num_classes=self._num_classes, + hidden_dim=self._hidden_size, + mask_dim=self._fpn_feat_dims) + + #self.panoptic_interpolate = tf.keras.layers.Resizing( + # image_shape[1], image_shape[2], interpolation = "bilinear") + super(MaskFormer, self).build(image_shape) + + def process_feature_maps(self, maps): + new_dict = {} + for k in maps.keys(): + new_dict[k[0]] = maps[k] + return new_dict - per_pixel_embeddings = self._pixel_decoder(feature_maps) - print("\n\nper_pixel_embeddings:", tf.shape(per_pixel_embeddings)) - - class_and_mask_probs = self._MLP_head( - {'per_pixel_embeddings': per_pixel_embeddings, 'per_segment_embeddings': tf.stack(per_segment_embeddings)}) - - print("\n\nClass:", tf.shape(class_and_mask_probs["class_prob_predictions"])) - print("\n\nMask:", tf.shape(class_and_mask_probs["mask_prob_predictions"])) - return class_and_mask_probs + def call(self, image, training = False): + # image = tf.reshape(image, [1, 800, 1135, 3]) + # image = tf.ones((1, 640, 640, 3)) + backbone_feature_maps = self.backbone(image) + mask_features, transformer_enc_feat = self.pixel_decoder(self.process_feature_maps(backbone_feature_maps)) + transformer_features = self.transformer({"features": transformer_enc_feat}) + + seg_pred = self.head({"per_pixel_embeddings" : mask_features, + "per_segment_embeddings": transformer_features}) + #if not training: + # seg_pred["pred_masks"] = self.panoptic_interpolate(seg_pred["pred_masks"]) + return seg_pred diff --git a/models/official/projects/maskformer/modeling/maskformer_test.py b/models/official/projects/maskformer/modeling/maskformer_test.py index 962df813..41c91d0e 100644 --- a/models/official/projects/maskformer/modeling/maskformer_test.py +++ b/models/official/projects/maskformer/modeling/maskformer_test.py @@ -1,42 +1,46 @@ -from official.projects.maskformer.maskformer import MaskFormer +from official.projects.maskformer.modeling.maskformer import MaskFormer from absl.testing import parameterized import tensorflow as tf class MaskFormerTest(tf.test.TestCase, parameterized.TestCase): # TODO(ibrahim): Add more testcases. - @parameterized.named_parameters(('test1', 'coco_stuff', 100, 171), ('test2', 'coco_panoptic', 100, 133)) - def test_pass_through(self, testcase_input_name, num_queries, num_classes): - - model = MaskFormer(num_queries=num_queries, num_classes=num_classes) - - # input_image = tf.ones((1, 640, 640, 3)) - testcase_input = { - "coco_stuff": tf.ones((1, 640, 640, 3)), - "coco_panoptic": tf.ones((1, 608, 911, 3)) - } + @parameterized.named_parameters(('test1', 256, 100, 256, "5", 6, 0, 6, 133, 1)) + def test_pass_through(self, + fpn_feat_dims, + num_queries, + hidden_size, + backbone_endpoint_name, + fpn_encoder_layers, + detr_encoder_layers, + num_decoder_layers, + num_classes, + batch_size): + input_specs = tf.keras.layers.InputSpec(shape=[None] + + [640, 640, 3]) + maskformer = MaskFormer(input_specs= input_specs, hidden_size=hidden_size, + backbone_endpoint_name=backbone_endpoint_name, + fpn_encoder_layers=fpn_encoder_layers, + detr_encoder_layers=detr_encoder_layers, + num_decoder_layers=num_decoder_layers, + num_classes=num_classes, + batch_size=batch_size) + + input_image = tf.ones((1, 640, 640, 3)) - # TODO(ibrahim): Add num_queries and make expected output shape dynamic after adding parameters. # expected_class_probs_shape = [1, 100, 172] # expected_mask_probs_shape = [1, 160, 160, 100] + expected_class_probs_shape = [1, 100, 134] # B, dim of logits, number of classes + expected_mask_probs_shape = [1, 160, 160, 100] # B,H,W,C - testcases_expected_output = { - "coco_stuff": { - "class_prob_predictions": [1, 100, 172], - "mask_prob_predictions": [1, 160, 160, 100] - }, - "coco_panoptic": { - "class_prob_predictions": [1, num_queries, 134], - "mask_prob_predictions": [1, 152, 228, num_queries] - } - } - - output = model(testcase_input[testcase_input_name]) - + output = maskformer(input_image) + print(output.keys()) + exit() self.assertAllEqual( - output["class_prob_predictions"].shape.as_list(), testcases_expected_output[testcase_input_name]["class_prob_predictions"]) + output["class_prob_predictions"].shape.as_list(), expected_class_probs_shape) self.assertAllEqual( - output["mask_prob_predictions"].shape.as_list(), testcases_expected_output[testcase_input_name]["mask_prob_predictions"]) + output["mask_prob_predictions"].shape.as_list(), expected_mask_probs_shape) if __name__ == '__main__': tf.test.main() + diff --git a/models/official/projects/maskformer/optimization.py b/models/official/projects/maskformer/optimization.py new file mode 100644 index 00000000..062dac1c --- /dev/null +++ b/models/official/projects/maskformer/optimization.py @@ -0,0 +1,147 @@ +# Copyright 2023 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Customized optimizer to match paper results.""" + +import dataclasses +import tensorflow as tf +from official.modeling import optimization +from official.nlp import optimization as nlp_optimization + + +@dataclasses.dataclass +class DETRAdamWConfig(optimization.AdamWeightDecayConfig): + pass + + +@dataclasses.dataclass +class OptimizerConfig(optimization.OptimizerConfig): + detr_adamw: DETRAdamWConfig = DETRAdamWConfig() + + +@dataclasses.dataclass +class OptimizationConfig(optimization.OptimizationConfig): + """Configuration for optimizer and learning rate schedule. + + Attributes: + optimizer: optimizer oneof config. + ema: optional exponential moving average optimizer config, if specified, ema + optimizer will be used. + learning_rate: learning rate oneof config. + warmup: warmup oneof config. + """ + optimizer: OptimizerConfig = OptimizerConfig() + + +# TODO(frederickliu): figure out how to make this configuable. +# TODO(frederickliu): Study if this is needed. +class _DETRAdamW(nlp_optimization.AdamWeightDecay): + """Custom AdamW to support different lr scaling for backbone. + + The code is copied from AdamWeightDecay and Adam with learning scaling. + """ + + def _resource_apply_dense(self, grad, var, apply_state=None): + lr_t, kwargs = self._get_lr(var.device, var.dtype.base_dtype, apply_state) + apply_state = kwargs['apply_state'] + if 'detr' not in var.name: + lr_t *= 0.1 + decay = self._decay_weights_op(var, lr_t, apply_state) + with tf.control_dependencies([decay]): + var_device, var_dtype = var.device, var.dtype.base_dtype + coefficients = ((apply_state or {}).get((var_device, var_dtype)) + or self._fallback_apply_state(var_device, var_dtype)) + + m = self.get_slot(var, 'm') + v = self.get_slot(var, 'v') + lr = coefficients[ + 'lr_t'] * 0.1 if 'detr' not in var.name else coefficients['lr_t'] + + if not self.amsgrad: + return tf.raw_ops.ResourceApplyAdam( + var=var.handle, + m=m.handle, + v=v.handle, + beta1_power=coefficients['beta_1_power'], + beta2_power=coefficients['beta_2_power'], + lr=lr, + beta1=coefficients['beta_1_t'], + beta2=coefficients['beta_2_t'], + epsilon=coefficients['epsilon'], + grad=grad, + use_locking=self._use_locking) + else: + vhat = self.get_slot(var, 'vhat') + return tf.raw_ops.ResourceApplyAdamWithAmsgrad( + var=var.handle, + m=m.handle, + v=v.handle, + vhat=vhat.handle, + beta1_power=coefficients['beta_1_power'], + beta2_power=coefficients['beta_2_power'], + lr=lr, + beta1=coefficients['beta_1_t'], + beta2=coefficients['beta_2_t'], + epsilon=coefficients['epsilon'], + grad=grad, + use_locking=self._use_locking) + + def _resource_apply_sparse(self, grad, var, indices, apply_state=None): + lr_t, kwargs = self._get_lr(var.device, var.dtype.base_dtype, apply_state) + apply_state = kwargs['apply_state'] + if 'detr' not in var.name: + lr_t *= 0.1 + decay = self._decay_weights_op(var, lr_t, apply_state) + with tf.control_dependencies([decay]): + var_device, var_dtype = var.device, var.dtype.base_dtype + coefficients = ((apply_state or {}).get((var_device, var_dtype)) + or self._fallback_apply_state(var_device, var_dtype)) + + # m_t = beta1 * m + (1 - beta1) * g_t + m = self.get_slot(var, 'm') + m_scaled_g_values = grad * coefficients['one_minus_beta_1_t'] + m_t = tf.compat.v1.assign(m, m * coefficients['beta_1_t'], + use_locking=self._use_locking) + with tf.control_dependencies([m_t]): + m_t = self._resource_scatter_add(m, indices, m_scaled_g_values) + + # v_t = beta2 * v + (1 - beta2) * (g_t * g_t) + v = self.get_slot(var, 'v') + v_scaled_g_values = (grad * grad) * coefficients['one_minus_beta_2_t'] + v_t = tf.compat.v1.assign(v, v * coefficients['beta_2_t'], + use_locking=self._use_locking) + with tf.control_dependencies([v_t]): + v_t = self._resource_scatter_add(v, indices, v_scaled_g_values) + lr = coefficients[ + 'lr_t'] * 0.1 if 'detr' not in var.name else coefficients['lr_t'] + if not self.amsgrad: + v_sqrt = tf.sqrt(v_t) + var_update = tf.compat.v1.assign_sub( + var, lr * m_t / (v_sqrt + coefficients['epsilon']), + use_locking=self._use_locking) + return tf.group(*[var_update, m_t, v_t]) + else: + v_hat = self.get_slot(var, 'vhat') + v_hat_t = tf.maximum(v_hat, v_t) + with tf.control_dependencies([v_hat_t]): + v_hat_t = tf.compat.v1.assign( + v_hat, v_hat_t, use_locking=self._use_locking) + v_hat_sqrt = tf.sqrt(v_hat_t) + var_update = tf.compat.v1.assign_sub( + var, + lr* m_t / (v_hat_sqrt + coefficients['epsilon']), + use_locking=self._use_locking) + return tf.group(*[var_update, m_t, v_t, v_hat_t]) + +optimization.register_optimizer_cls('detr_adamw', _DETRAdamW) diff --git a/models/official/projects/maskformer/readme.md b/models/official/projects/maskformer/readme.md index 0e305431..d1dbc5cc 100644 --- a/models/official/projects/maskformer/readme.md +++ b/models/official/projects/maskformer/readme.md @@ -1,30 +1,21 @@ -# MaskFormer: Per-Pixel Classification is Not All You Need for Semantic Segmentation -TensorFlow 2 implementation of MaskFormer: Per-Pixel Classification is Not All You Need for Semantic Segmentation - - -export PYTHONPATH=$PYTHONPATH:/depot/qqiu/data/vishal/projects/tf_maskformer_debug/models +``` +module load cuda/11.7.0 cudnn/cuda-11.7_8.6 gcc/6.3.0 +export PYTHONPATH=$PYTHONPATH: +``` -## Imp paths -code path - /depot/qqiu/data/vishal/projects/tf_maskformer_debug/models/official/projects/maskformer/ckpts -data path - /depot/davisjam/data/vishal/datasets/coco ## Environment creation +``` conda create -n tfmaskformer -conda activate /depot/qqiu/data/vishal/envs/tmaskformer -pip install -r requirements.txt - -## Dataset Download and Prep +conda activate tfmaskformer +pip install -r /models/official/requirements.txt +pip install tensorflow-text-nightly ``` -chmod +x ./data/create_tf_records.sh -cd /depot/qqiu/data/vishal/projects/tf_maskformer_integration/official/projects/maskformer/data -./create_tf_records.sh /depot/davisjam/data/vishal/datasets/coco +## To start training ``` -module load cuda/11.7.0 cudnn/cuda-11.7_8.6 gcc/6.3.0 - - -# For debugging the code python3 official/projects/maskformer/train.py \ --experiment=maskformer_coco_panoptic \ - --mode=train_and_eval \ - --model_dir=/depot/qqiu/data/vishal/projects/tf_maskformer_debug/models/official/projects/maskformer/ckpts \ \ No newline at end of file + --mode=train \ + --model_dir= \ +``` \ No newline at end of file diff --git a/models/official/projects/maskformer/requirements.txt b/models/official/projects/maskformer/requirements.txt index 91cd6172..addfe13e 100644 --- a/models/official/projects/maskformer/requirements.txt +++ b/models/official/projects/maskformer/requirements.txt @@ -1,4 +1,4 @@ -tensorflow +tensorflow==2.11.0 pyyaml gin-config==0.1.1 tensorflow_addons diff --git a/models/official/projects/maskformer/tasks/.ipynb_checkpoints/panoptic_maskformer-checkpoint.py b/models/official/projects/maskformer/tasks/.ipynb_checkpoints/panoptic_maskformer-checkpoint.py new file mode 100644 index 00000000..4fb99775 --- /dev/null +++ b/models/official/projects/maskformer/tasks/.ipynb_checkpoints/panoptic_maskformer-checkpoint.py @@ -0,0 +1,214 @@ +import tensorflow as tf + +from official.core import base_task +from official.core import task_factory +from typing import Any, Dict, List, Mapping, Optional, Tuple + +from official.projects.maskformer.dataloaders import input_reader +from official.vision.dataloaders import input_reader_factory +from official.common import dataset_fn + +from official.projects.maskformer.configs import maskformer as exp_cfg +from official.projects.maskformer.modeling.maskformer import MaskFormer +from official.projects.maskformer.losses.maskformer_losses import Loss +from official.projects.maskformer.dataloaders import panoptic_input + +from official.projects.detr.ops.matchers import hungarian_matching +from official.projects.maskformer.losses.maskformer_losses import Loss + +import numpy as np +from loguru import logger + +@task_factory.register_task_cls(exp_cfg.MaskFormerTask) +class PanopticTask(base_task.Task): + + def build_model(self)-> tf.keras.Model: + """Builds MaskFormer Model.""" + # TODO : Remove hardcoded values, Verify the number of classes + input_specs = tf.keras.layers.InputSpec(shape=[None] + + [640, 640, 3]) + + model = MaskFormer(input_specs= input_specs, hidden_size=256, + backbone_endpoint_name="5", + num_encoder_layers=0, + num_decoder_layers=6, + num_classes=133, + batch_size=8) + + return model + + def build_inputs(self, params, input_context: Optional[tf.distribute.InputContext] = None) -> tf.data.Dataset: + """ + Build panoptic segmentation dataset. + + """ + + # tf.profiler.experimental.server.start(6000) + if params.decoder.type == 'simple_decoder': + decoder = panoptic_input.TfExampleDecoder(regenerate_source_id = params.regenerate_source_id) + else: + raise ValueError('Unknown decoder type: {}!'.format(params.decoder.type)) + + parser = panoptic_input.mask_former_parser(params.parser, is_training = params.is_training, decoder_fn=decoder.decode) + reader = input_reader.InputFn(params,dataset_fn = dataset_fn.pick_dataset_fn(params.file_type),parser_fn = parser) + dataset = reader(ctx=input_context) + # for sample in dataset.take(1): + # print(f"unique ids : {sample[1]['unique_ids']}") + # print("individual masks :", sample[1]["individual_masks"].shape) + # print(f"image shape : {sample[0].shape}") + # logger.debug(f"category_mask : {sample[1]['category_mask'].shape}") + # logger.debug(f"mask_labels :{sample[1]['mask_labels']}") + # logger.debug(f"instance_mask:{sample[1]['instance_mask'].shape}") + # print(sample[1]["instance_centers_heatmap"].shape) + # print(sample[1]["instance_centers_offset"].shape) + # print(sample[1]["semantic_weights"].shape) + # print(sample[1]["valid_mask"].shape) + # print(sample[1]["things_mask"].shape) + + # exit() + + return dataset + + def initialize(self, model: tf.keras.Model) -> None: + """ + Used to initialize the models with checkpoint + """ + #TODO : R50 checkpoint + pass + + def build_losses(self, output, labels, aux_outputs=None): + # TODO : Auxilary outputs + outputs = {"pred_logits": output["class_prob_predictions"], "pred_masks": output["mask_prob_predictions"]} + targets = labels + # print("pred_logits : ", outputs["pred_logits"].shape) + # print("mask_prob_predictions : ", outputs["pred_masks"].shape) + + matcher = hungarian_matching + no_object_weight = 0.1 + # TODO : Remove hardcoded values, number of classes + loss = Loss( + num_classes = 133, + matcher = matcher, + eos_coef = no_object_weight, + cost_class= 1.0, + cost_dice= 1.0, + cost_focal=20.0 + ) + + calculated_losses = loss(outputs, targets) + + # Losses are returned as weighted sum of individual losses + total_loss = calculated_losses['loss_ce'] + calculated_losses['loss_dice'] + calculated_losses['loss_focal'] + + weighted_ce = calculated_losses['loss_ce'] + weighted_focal = calculated_losses['loss_dice'] + weighted_dice = calculated_losses['loss_focal'] + + # Not implemented auxilary outputs + # if aux_outputs is not None: + # total_aux_loss = 0.0 + # # TODO : Remove hardcoding + # for i in range(4): #4 number of auxilary outputs + # total_aux_loss += calculated_losses['loss_ce_'+str(i)] + calculated_losses['loss_dice_'+str(i)] + calculated_losses['loss_focal_'+str(i)] + # total_loss = total_loss + total_aux_loss + + + return total_loss, weighted_ce, weighted_focal, weighted_dice + + def build_metrics(self, training=True): + """Builds panoptic metrics.""" + metrics = [] + metric_names = ['cls_loss', 'focal_loss', 'dice_loss'] + for name in metric_names: + metrics.append(tf.keras.metrics.Mean(name, dtype=tf.float32)) + # TODO : Need panoptic quality metric for evaluation + + return metrics + + + + def train_step(self, inputs: Tuple[Any, Any],model: tf.keras.Model, optimizer: tf.keras.optimizers.Optimizer, metrics: Optional[List[Any]] = None) -> Dict[str, Any]: + """ + Does forward and backward. + + Args: + inputs: a dictionary of input tensors. + model: the model, forward pass definition. + optimizer: the optimizer for this training step. + metrics: a nested structure of metrics objects. + + Returns: + A dictionary of logs. + """ + features, labels = inputs + with tf.GradientTape() as tape: + outputs = model(features, training=True) + + loss = 0.0 + cls_loss = 0.0 + focal_loss = 0.0 + dice_loss = 0.0 + + ########################################################## + # TODO : Need to use this for TPU training when we use mirrored startegy + + # print(outputs.shape) + # exit() + # for output in outputs: + # # Computes per-replica loss. + + # total_loss, cls_loss_, focal_loss_, dice_loss_ = self.build_losses( + # output=output, labels=labels) + # loss += total_loss + # cls_loss += cls_loss_ + # focal_loss += focal_loss_ + # dice_loss += dice_loss_ + + # scaled_loss = loss + # # For mixed_precision policy, when LossScaleOptimizer is used, loss is + # # scaled for numerical stability. + if isinstance(optimizer, tf.keras.mixed_precision.LossScaleOptimizer): + scaled_loss = optimizer.get_scaled_loss(scaled_loss) + ########################################################################## + + # TODO : Add auxiallary losses + total_loss, cls_loss, focal_loss, dice_loss = self.build_losses(output=outputs, labels=labels) + + tvars = model.trainable_variables + + grads = tape.gradient(total_loss, tvars) + + #################################################################### + # Do not use mixed precision for now + # # Scales back gradient when LossScaleOptimizer is used. + + optimizer.apply_gradients(list(zip(grads, tvars))) + + # # Multiply for logging. + # # Since we expect the gradient replica sum to happen in the optimizer, + # # the loss is scaled with global num_boxes and weights. + # # To have it more interpretable/comparable we scale it back when logging. + num_replicas_in_sync = tf.distribute.get_strategy().num_replicas_in_sync + total_loss *= num_replicas_in_sync + cls_loss *= num_replicas_in_sync + focal_loss *= num_replicas_in_sync + dice_loss *= num_replicas_in_sync + ##################################################################### + # # Trainer class handles loss metric for you. + logs = {self.loss: total_loss} + + all_losses = { + 'cls_loss': cls_loss, + 'focal_loss': focal_loss, + 'dice_loss': dice_loss, + } + + + # # Metric results will be added to logs for you. + if metrics: + for m in metrics: + m.update_state(all_losses[m.name]) + return logs + + def validation_step(self, inputs, model, optimizer, metrics=None): + pass diff --git a/models/official/projects/maskformer/tasks/__pycache__/panoptic_maskformer.cpython-39.pyc b/models/official/projects/maskformer/tasks/__pycache__/panoptic_maskformer.cpython-39.pyc index 08fccbe2..449dc4ae 100644 Binary files a/models/official/projects/maskformer/tasks/__pycache__/panoptic_maskformer.cpython-39.pyc and b/models/official/projects/maskformer/tasks/__pycache__/panoptic_maskformer.cpython-39.pyc differ diff --git a/models/official/projects/maskformer/tasks/panoptic_maskformer.py b/models/official/projects/maskformer/tasks/panoptic_maskformer.py index c5f5523e..411082b1 100644 --- a/models/official/projects/maskformer/tasks/panoptic_maskformer.py +++ b/models/official/projects/maskformer/tasks/panoptic_maskformer.py @@ -1,97 +1,258 @@ import tensorflow as tf from official.core import base_task +from official.core import task_factory from typing import Any, Dict, List, Mapping, Optional, Tuple + +from official.projects.maskformer.dataloaders import input_reader +from official.vision.dataloaders import input_reader_factory +from official.common import dataset_fn + +from official.projects.maskformer.configs import maskformer as exp_cfg from official.projects.maskformer.modeling.maskformer import MaskFormer from official.projects.maskformer.losses.maskformer_losses import Loss +from official.projects.maskformer.losses.inference import PanopticInference from official.projects.maskformer.dataloaders import panoptic_input -from official.vision.dataloaders import input_reader -from official.vision.dataloaders import input_reader_factory -from official.common import dataset_fn +from official.projects.detr.ops.matchers import hungarian_matching + +import numpy as np +from loguru import logger + +@task_factory.register_task_cls(exp_cfg.MaskFormerTask) class PanopticTask(base_task.Task): def build_model(self)-> tf.keras.Model: """Builds MaskFormer Model.""" - # TODO(ibrahim): Connect to params in config. - model = MaskFormer() + # TODO : Remove hardcoded values, Verify the number of classes + input_specs = tf.keras.layers.InputSpec(shape=[None] + + self._task_config.model.input_size) + + model = MaskFormer(input_specs= input_specs, + num_queries=self._task_config.model.num_queries, + hidden_size=self._task_config.model.hidden_size, + backbone_endpoint_name=self._task_config.model.backbone_endpoint_name, + fpn_encoder_layers=self._task_config.model.fpn_encoder_layers, + detr_encoder_laters=self._task_config.model.detr_encoder_layers, + num_decoder_layers=self._task_config.model.num_decoder_layers, + num_classes=self._task_config.model.num_classes, + ) return model + + def initialize(self, model: tf.keras.Model) -> None: + """ + Used to initialize the models with checkpoint + """ + """Loading pretrained checkpoint.""" + if not self._task_config.init_checkpoint: + return + + ckpt_dir_or_file = self._task_config.init_checkpoint + + # Restoring checkpoint. + if tf.io.gfile.isdir(ckpt_dir_or_file): + ckpt_dir_or_file = tf.train.latest_checkpoint(ckpt_dir_or_file) + + if self._task_config.init_checkpoint_modules == 'all': + ckpt = tf.train.Checkpoint(**model.checkpoint_items) + status = ckpt.restore(ckpt_dir_or_file) + status.assert_consumed() + elif self._task_config.init_checkpoint_modules == 'backbone': + ckpt = tf.train.Checkpoint(backbone=model.backbone) + status = ckpt.restore(ckpt_dir_or_file) + status.expect_partial().assert_existing_objects_matched() + + logging.info('Finished loading pretrained checkpoint from %s', + ckpt_dir_or_file) def build_inputs(self, params, input_context: Optional[tf.distribute.InputContext] = None) -> tf.data.Dataset: """ Build panoptic segmentation dataset. - """ - pass + + # tf.profiler.experimental.server.start(6000) + if params.decoder.type == 'simple_decoder': + decoder = panoptic_input.TfExampleDecoder(regenerate_source_id = params.regenerate_source_id) + else: + raise ValueError('Unknown decoder type: {}!'.format(params.decoder.type)) + + parser = panoptic_input.mask_former_parser(params.parser, is_training = params.is_training, decoder_fn=decoder.decode) + reader = input_reader.InputFn(params,dataset_fn = dataset_fn.pick_dataset_fn(params.file_type),parser_fn = parser) + dataset = reader(ctx=input_context) + + return dataset + + + + def build_losses(self, output, labels, aux_outputs=None): + # TODO : Auxilary outputs + outputs = {"pred_logits": output["class_prob_predictions"], "pred_masks": output["mask_prob_predictions"]} + targets = labels + # print("pred_logits : ", outputs["pred_logits"].shape) + # print("mask_prob_predictions : ", outputs["pred_masks"].shape) + + matcher = hungarian_matching + no_object_weight = self._task_config.losses.no_object_weight + # TODO : Remove hardcoded values, number of classes + loss = Loss( + num_classes = self._task_config.model.num_classes, + matcher = matcher, + eos_coef = no_object_weight, + cost_class= self._task_config.losses.cost_class, + cost_dice= self._task_config.losses.cost_dice, + cost_focal= self._task_config.losses.cost_focal + ) - def build_losses(self, class_prob_outputs, mask_prob_outputs, class_targets, mask_targets): - outputs = {"pred_logits": class_prob_outputs, "pred_masks": mask_prob_outputs} - targets = {"labels": class_targets, "masks": mask_targets} + calculated_losses = loss(outputs, targets) - # _compute_loss = Loss(init loss here...) - # return _compute_loss(outputs, targets) - raise NotImplementedError + # Losses are returned as weighted sum of individual losses + total_loss = calculated_losses['loss_ce'] + calculated_losses['loss_dice'] + calculated_losses['loss_focal'] + + weighted_ce = calculated_losses['loss_ce'] + weighted_focal = calculated_losses['loss_dice'] + weighted_dice = calculated_losses['loss_focal'] + + # Not implemented auxilary outputs + # if aux_outputs is not None: + # total_aux_loss = 0.0 + # # TODO : Remove hardcoding + # for i in range(4): #4 number of auxilary outputs + # total_aux_loss += calculated_losses['loss_ce_'+str(i)] + calculated_losses['loss_dice_'+str(i)] + calculated_losses['loss_focal_'+str(i)] + # total_loss = total_loss + total_aux_loss + + + return total_loss, weighted_ce, weighted_focal, weighted_dice def build_metrics(self, training=True): - raise NotImplementedError - + """Builds panoptic metrics.""" + metrics = [] + metric_names = ['cls_loss', 'focal_loss', 'dice_loss'] + for name in metric_names: + metrics.append(tf.keras.metrics.Mean(name, dtype=tf.float32)) + # TODO : Need panoptic quality metric for evaluation + + return metrics + + + def train_step(self, inputs: Tuple[Any, Any],model: tf.keras.Model, optimizer: tf.keras.optimizers.Optimizer, metrics: Optional[List[Any]] = None) -> Dict[str, Any]: + """ + Does forward and backward. + + Args: + inputs: a dictionary of input tensors. + model: the model, forward pass definition. + optimizer: the optimizer for this training step. + metrics: a nested structure of metrics objects. + + Returns: + A dictionary of logs. + """ features, labels = inputs with tf.GradientTape() as tape: outputs = model(features, training=True) - - #TODO Change to maskformer loss + loss = 0.0 cls_loss = 0.0 - box_loss = 0.0 - giou_loss = 0.0 - - for output in outputs: - # Computes per-replica loss. - layer_loss, layer_cls_loss, layer_box_loss, layer_giou_loss = self.build_losses( - outputs=output, labels=labels, aux_losses=model.losses) - loss += layer_loss - cls_loss += layer_cls_loss - box_loss += layer_box_loss - giou_loss += layer_giou_loss + focal_loss = 0.0 + dice_loss = 0.0 + + ########################################################## + # TODO : Need to use this for TPU training when we use mirrored startegy + + # print(outputs.shape) + # exit() + # for output in outputs: + # # Computes per-replica loss. + + # total_loss, cls_loss_, focal_loss_, dice_loss_ = self.build_losses( + # output=output, labels=labels) + # loss += total_loss + # cls_loss += cls_loss_ + # focal_loss += focal_loss_ + # dice_loss += dice_loss_ - scaled_loss = loss - # For mixed_precision policy, when LossScaleOptimizer is used, loss is - # scaled for numerical stability. + # scaled_loss = loss + # # For mixed_precision policy, when LossScaleOptimizer is used, loss is + # # scaled for numerical stability. if isinstance(optimizer, tf.keras.mixed_precision.LossScaleOptimizer): scaled_loss = optimizer.get_scaled_loss(scaled_loss) - - tvars = model.trainable_variables - grads = tape.gradient(scaled_loss, tvars) - # Scales back gradient when LossScaleOptimizer is used. - if isinstance(optimizer, tf.keras.mixed_precision.LossScaleOptimizer): - grads = optimizer.get_unscaled_gradients(grads) - optimizer.apply_gradients(list(zip(grads, tvars))) - - # Multiply for logging. - # Since we expect the gradient replica sum to happen in the optimizer, - # the loss is scaled with global num_boxes and weights. - # To have it more interpretable/comparable we scale it back when logging. + ########################################################################## + + # TODO : Add auxiallary losses + total_loss, cls_loss, focal_loss, dice_loss = self.build_losses(output=outputs, labels=labels) + + tvars = model.trainable_variables + + grads = tape.gradient(total_loss, tvars) + + #################################################################### + # Do not use mixed precision for now + # # Scales back gradient when LossScaleOptimizer is used. + + optimizer.apply_gradients(list(zip(grads, tvars))) + + # # Multiply for logging. + # # Since we expect the gradient replica sum to happen in the optimizer, + # # the loss is scaled with global num_boxes and weights. + # # To have it more interpretable/comparable we scale it back when logging. + num_replicas_in_sync = tf.distribute.get_strategy().num_replicas_in_sync + total_loss *= num_replicas_in_sync + cls_loss *= num_replicas_in_sync + focal_loss *= num_replicas_in_sync + dice_loss *= num_replicas_in_sync + ##################################################################### + # # Trainer class handles loss metric for you. + logs = {self.loss: total_loss} + + all_losses = { + 'cls_loss': cls_loss, + 'focal_loss': focal_loss, + 'dice_loss': dice_loss, + } + + + # # Metric results will be added to logs for you. + if metrics: + for m in metrics: + m.update_state(all_losses[m.name]) + return logs + + def validation_step(self, inputs, model, optimizer, metrics=None): + features, labels = inputs + outputs = model(features, training=False) + + loss = 0.0 + cls_loss = 0.0 + focal_loss = 0.0 + dice_loss = 0.0 + + total_loss, cls_loss, focal_loss, dice_loss = self.build_losses(output=outputs, labels=labels) + num_replicas_in_sync = tf.distribute.get_strategy().num_replicas_in_sync - loss *= num_replicas_in_sync + total_loss *= num_replicas_in_sync cls_loss *= num_replicas_in_sync - box_loss *= num_replicas_in_sync - giou_loss *= num_replicas_in_sync - - # Trainer class handles loss metric for you. - logs = {self.loss: loss} + focal_loss *= num_replicas_in_sync + dice_loss *= num_replicas_in_sync + + ##################################################################### + # # Trainer class handles loss metric for you. + logs = {self.loss: total_loss} + + outputs = {"pred_logits": output["class_prob_predictions"], "pred_masks": output["mask_prob_predictions"]} + panoptic_seg, segments_info = PanopticInference(output["pred_logits"], output["pred_masks"], features.shape, self._task_config.model.num_classes) + + logs.update({'panoptic_seg': panoptic_seg, 'segments_info': segments_info}) all_losses = { - 'cls_loss': cls_loss, - 'box_loss': box_loss, - 'giou_loss': giou_loss, - } - # Metric results will be added to logs for you. + 'cls_loss': cls_loss, + 'focal_loss': focal_loss, + 'dice_loss': dice_loss, + } + + # # Metric results will be added to logs for you. if metrics: - for m in metrics: - m.update_state(all_losses[m.name]) + for m in metrics: + m.update_state(all_losses[m.name]) return logs - - def validation_step(self, inputs, model, optimizer, metrics=None): - raise NotImplementedError diff --git a/models/official/projects/maskformer/testing.ipynb b/models/official/projects/maskformer/testing.ipynb new file mode 100644 index 00000000..956cbd9b --- /dev/null +++ b/models/official/projects/maskformer/testing.ipynb @@ -0,0 +1,32 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.5" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/models/official/projects/maskformer/tpu_test.py b/models/official/projects/maskformer/tpu_test.py new file mode 100644 index 00000000..687d857e --- /dev/null +++ b/models/official/projects/maskformer/tpu_test.py @@ -0,0 +1,8 @@ +import tensorflow as tf + +if __name__ == "__main__": + cluster_resolver = tf.distribute.cluster_resolver.TPUClusterResolver( + tpu="tf-train-1", project="red-atlas-305317", zone="us-central1-a") + tf.config.experimental_connect_to_cluster(cluster_resolver) + tf.tpu.experimental.initialize_tpu_system(cluster_resolver) + strategy = tf.distribute.TPUStrategy(cluster_resolver) \ No newline at end of file diff --git a/models/official/projects/maskformer/train.py b/models/official/projects/maskformer/train.py index df0f4779..c1f04537 100644 --- a/models/official/projects/maskformer/train.py +++ b/models/official/projects/maskformer/train.py @@ -12,8 +12,16 @@ from official.modeling import performance from official.projects.maskformer.configs import maskformer from official.projects.maskformer.tasks import panoptic_maskformer +import tensorflow as tf +from cloud_tpu_client import Client +import os +FLAGS = flags.FLAGS def main(_): + # This works only for TPU v3 version + c = Client(os.environ['TPU_NAME'], zone=os.environ['TPU_ZONE'], project=os.environ['TPU_PROJECT']) + c.configure_tpu_version(os.environ["TPU_SOFTWARE"], restart_type='ifNeeded') + c.wait_for_healthy() gin.parse_config_files_and_bindings(FLAGS.gin_file, FLAGS.gin_params) params = train_utils.parse_configuration(FLAGS) model_dir = FLAGS.model_dir @@ -26,16 +34,31 @@ def main(_): # can have significant impact on model speeds by utilizing float16 in case of # GPUs, and bfloat16 in the case of TPUs. loss_scale takes effect only when # dtype is float16 + + # resolver = tf.distribute.cluster_resolver.TPUClusterResolver() + + # Uncomment to test on TPU if params.runtime.mixed_precision_dtype: performance.set_mixed_precision_policy(params.runtime.mixed_precision_dtype) distribution_strategy = distribute_utils.get_distribution_strategy( - distribution_strategy=params.runtime.distribution_strategy, + distribution_strategy="tpu", all_reduce_alg=params.runtime.all_reduce_alg, num_gpus=params.runtime.num_gpus, tpu_address=params.runtime.tpu) + + # Comment if running on TPU + # if params.runtime.mixed_precision_dtype: + # performance.set_mixed_precision_policy(params.runtime.mixed_precision_dtype) + # distribution_strategy = distribute_utils.get_distribution_strategy( + # distribution_strategy=params.runtime.distribution_strategy, + # all_reduce_alg=params.runtime.all_reduce_alg, + # num_gpus=1) + + # Below code is independent of compute platform with distribution_strategy.scope(): task = task_factory.get_task(params.task, logging_dir=model_dir) - + + train_lib.run_experiment( distribution_strategy=distribution_strategy, task=task, diff --git a/models/official/projects/maskformer/train_r50.sh b/models/official/projects/maskformer/train_r50.sh index 9c57ca22..5315f7a7 100644 --- a/models/official/projects/maskformer/train_r50.sh +++ b/models/official/projects/maskformer/train_r50.sh @@ -1,17 +1,24 @@ -$ export MODEL_DIR="gs://" -$ export TPU_NAME="" -$ export ANNOTATION_FILE="gs://" -$ export TRAIN_DATA="gs://" -$ export EVAL_DATA="gs://" -$ export OVERRIDES="task.validation_data.input_path=${EVAL_DATA},\ -task.train_data.input_path=${TRAIN_DATA},\ -task.annotation_file=${ANNOTATION_FILE},\ -runtime.distribution_strategy=tpu" - - -$ python3 train.py \ - --experiment panoptic_r50_coco \ - --config_file configs/experiments/panoptic_coco_r50.yaml \ +#!/bin/bash +fusermount -u ~/datasets +fusermount -u ~/models +gcsfuse --implicit-dirs cam2-datasets ~/datasets +gcsfuse cam2-models ~/models +export PYTHONPATH=$PYTHONPATH:~/tf-maskformer/models +export MODEL_DIR="gs://cam2-models/maskformer" +export DATA_PTH="gs://cam2-datasets/coco_panoptic" +export TPU_NAME="tf-debug-eu-1" +export TPU_SOFTWARE="2.11.0" +export TPU_PROJECT="red-atlas-305317" +export TPU_ZONE="europe-west4-a" +# export OVERRIDES="task.validation_data.input_path=${DATA_PTH},\ +# task.train_data.input_path=${DATA_PTH},\ +# runtime.distribution_strategy=tpu" +export OVERRIDES="runtime.distribution_strategy=tpu,\ +runtime.mixed_precision_dtype=bfloat16,\ +trainer.train_steps=554400,\ +trainer.optimizer_config.learning_rate.stepwise.boundaries=[369600]" +python3 models/official/projects/maskformer/train.py \ + --experiment maskformer_coco_panoptic \ --mode train \ --model_dir $MODEL_DIR \ --tpu $TPU_NAME \ diff --git a/models/official/vision/__pycache__/__init__.cpython-38.pyc b/models/official/vision/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 00000000..1246d61a Binary files /dev/null and b/models/official/vision/__pycache__/__init__.cpython-38.pyc differ diff --git a/models/official/vision/__pycache__/__init__.cpython-39.pyc b/models/official/vision/__pycache__/__init__.cpython-39.pyc index 2eceb02d..57c5135a 100644 Binary files a/models/official/vision/__pycache__/__init__.cpython-39.pyc and b/models/official/vision/__pycache__/__init__.cpython-39.pyc differ diff --git a/models/official/vision/configs/__pycache__/__init__.cpython-38.pyc b/models/official/vision/configs/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 00000000..4da6602c Binary files /dev/null and b/models/official/vision/configs/__pycache__/__init__.cpython-38.pyc differ diff --git a/models/official/vision/configs/__pycache__/__init__.cpython-39.pyc b/models/official/vision/configs/__pycache__/__init__.cpython-39.pyc index 60fff740..ad1a4c84 100644 Binary files a/models/official/vision/configs/__pycache__/__init__.cpython-39.pyc and b/models/official/vision/configs/__pycache__/__init__.cpython-39.pyc differ diff --git a/models/official/vision/configs/__pycache__/backbones.cpython-38.pyc b/models/official/vision/configs/__pycache__/backbones.cpython-38.pyc new file mode 100644 index 00000000..55e2ac9d Binary files /dev/null and b/models/official/vision/configs/__pycache__/backbones.cpython-38.pyc differ diff --git a/models/official/vision/configs/__pycache__/backbones.cpython-39.pyc b/models/official/vision/configs/__pycache__/backbones.cpython-39.pyc index 290e4b3d..09c16202 100644 Binary files a/models/official/vision/configs/__pycache__/backbones.cpython-39.pyc and b/models/official/vision/configs/__pycache__/backbones.cpython-39.pyc differ diff --git a/models/official/vision/configs/__pycache__/backbones_3d.cpython-38.pyc b/models/official/vision/configs/__pycache__/backbones_3d.cpython-38.pyc new file mode 100644 index 00000000..99b4a6dd Binary files /dev/null and b/models/official/vision/configs/__pycache__/backbones_3d.cpython-38.pyc differ diff --git a/models/official/vision/configs/__pycache__/backbones_3d.cpython-39.pyc b/models/official/vision/configs/__pycache__/backbones_3d.cpython-39.pyc index 16738c50..0c39e9d9 100644 Binary files a/models/official/vision/configs/__pycache__/backbones_3d.cpython-39.pyc and b/models/official/vision/configs/__pycache__/backbones_3d.cpython-39.pyc differ diff --git a/models/official/vision/configs/__pycache__/common.cpython-38.pyc b/models/official/vision/configs/__pycache__/common.cpython-38.pyc new file mode 100644 index 00000000..95ceb60b Binary files /dev/null and b/models/official/vision/configs/__pycache__/common.cpython-38.pyc differ diff --git a/models/official/vision/configs/__pycache__/common.cpython-39.pyc b/models/official/vision/configs/__pycache__/common.cpython-39.pyc index 52a219f1..ecaeadae 100644 Binary files a/models/official/vision/configs/__pycache__/common.cpython-39.pyc and b/models/official/vision/configs/__pycache__/common.cpython-39.pyc differ diff --git a/models/official/vision/configs/__pycache__/decoders.cpython-39.pyc b/models/official/vision/configs/__pycache__/decoders.cpython-39.pyc index e3ff8906..2c0cffbc 100644 Binary files a/models/official/vision/configs/__pycache__/decoders.cpython-39.pyc and b/models/official/vision/configs/__pycache__/decoders.cpython-39.pyc differ diff --git a/models/official/vision/configs/__pycache__/image_classification.cpython-39.pyc b/models/official/vision/configs/__pycache__/image_classification.cpython-39.pyc index 411e29ef..76bb702e 100644 Binary files a/models/official/vision/configs/__pycache__/image_classification.cpython-39.pyc and b/models/official/vision/configs/__pycache__/image_classification.cpython-39.pyc differ diff --git a/models/official/vision/configs/__pycache__/maskrcnn.cpython-39.pyc b/models/official/vision/configs/__pycache__/maskrcnn.cpython-39.pyc index 625aedec..378f2ce1 100644 Binary files a/models/official/vision/configs/__pycache__/maskrcnn.cpython-39.pyc and b/models/official/vision/configs/__pycache__/maskrcnn.cpython-39.pyc differ diff --git a/models/official/vision/configs/__pycache__/retinanet.cpython-39.pyc b/models/official/vision/configs/__pycache__/retinanet.cpython-39.pyc index cc6083e9..867ba786 100644 Binary files a/models/official/vision/configs/__pycache__/retinanet.cpython-39.pyc and b/models/official/vision/configs/__pycache__/retinanet.cpython-39.pyc differ diff --git a/models/official/vision/configs/__pycache__/semantic_segmentation.cpython-39.pyc b/models/official/vision/configs/__pycache__/semantic_segmentation.cpython-39.pyc index 339bf33d..a38479f2 100644 Binary files a/models/official/vision/configs/__pycache__/semantic_segmentation.cpython-39.pyc and b/models/official/vision/configs/__pycache__/semantic_segmentation.cpython-39.pyc differ diff --git a/models/official/vision/configs/__pycache__/video_classification.cpython-39.pyc b/models/official/vision/configs/__pycache__/video_classification.cpython-39.pyc index a38e0f5c..e1fbe521 100644 Binary files a/models/official/vision/configs/__pycache__/video_classification.cpython-39.pyc and b/models/official/vision/configs/__pycache__/video_classification.cpython-39.pyc differ diff --git a/models/official/vision/data/__pycache__/__init__.cpython-39.pyc b/models/official/vision/data/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 00000000..60ae193f Binary files /dev/null and b/models/official/vision/data/__pycache__/__init__.cpython-39.pyc differ diff --git a/models/official/vision/data/__pycache__/tfrecord_lib.cpython-39.pyc b/models/official/vision/data/__pycache__/tfrecord_lib.cpython-39.pyc new file mode 100644 index 00000000..5a62d13d Binary files /dev/null and b/models/official/vision/data/__pycache__/tfrecord_lib.cpython-39.pyc differ diff --git a/models/official/vision/data/create_coco_tf_record.py b/models/official/vision/data/create_coco_tf_record.py index cfcd679a..334db760 100644 --- a/models/official/vision/data/create_coco_tf_record.py +++ b/models/official/vision/data/create_coco_tf_record.py @@ -138,12 +138,16 @@ def generate_coco_panoptics_masks(segments_info, mask_path, segment_id = segment['id'] category_id = segment['category_id'] is_crowd = segment['iscrowd'] + if FLAGS.panoptic_skip_crowd and is_crowd: continue + if is_category_thing[category_id]: + # This for thing encoded_category_id = _THING_CLASS_ID instance_id = idx + 1 else: + # This is for stuff (for stuff no instance id) encoded_category_id = category_id - _STUFF_CLASSES_OFFSET instance_id = _VOID_INSTANCE_ID @@ -151,7 +155,7 @@ def generate_coco_panoptics_masks(segments_info, mask_path, semantic_segmentation_mask[segment_mask] = encoded_category_id if include_panoptic_masks: - category_mask[segment_mask] = category_id + category_mask[segment_mask] = category_id instance_mask[segment_mask] = instance_id outputs = { diff --git a/models/official/vision/data/tfrecord_lib.py b/models/official/vision/data/tfrecord_lib.py index 4eeef2d8..f4090c04 100644 --- a/models/official/vision/data/tfrecord_lib.py +++ b/models/official/vision/data/tfrecord_lib.py @@ -24,7 +24,7 @@ import tensorflow as tf import multiprocessing as mp - +from tqdm import tqdm LOG_EVERY = 100 @@ -162,8 +162,8 @@ def write_tf_record_dataset(output_path, annotation_iterator, else: tf_example_iterator = map(process_func, annotation_iterator) - for idx, (tf_example, num_annotations_skipped) in enumerate( - tf_example_iterator): + for idx, (tf_example, num_annotations_skipped) in tqdm(enumerate( + tf_example_iterator)): if idx % LOG_EVERY == 0: logging.info('On image %d', idx) diff --git a/models/official/vision/dataloaders/__pycache__/__init__.cpython-39.pyc b/models/official/vision/dataloaders/__pycache__/__init__.cpython-39.pyc index c3500ec6..9909857c 100644 Binary files a/models/official/vision/dataloaders/__pycache__/__init__.cpython-39.pyc and b/models/official/vision/dataloaders/__pycache__/__init__.cpython-39.pyc differ diff --git a/models/official/vision/dataloaders/__pycache__/classification_input.cpython-39.pyc b/models/official/vision/dataloaders/__pycache__/classification_input.cpython-39.pyc index ca5fc162..226533cd 100644 Binary files a/models/official/vision/dataloaders/__pycache__/classification_input.cpython-39.pyc and b/models/official/vision/dataloaders/__pycache__/classification_input.cpython-39.pyc differ diff --git a/models/official/vision/dataloaders/__pycache__/decoder.cpython-39.pyc b/models/official/vision/dataloaders/__pycache__/decoder.cpython-39.pyc index 36cc59fb..dd42bb91 100644 Binary files a/models/official/vision/dataloaders/__pycache__/decoder.cpython-39.pyc and b/models/official/vision/dataloaders/__pycache__/decoder.cpython-39.pyc differ diff --git a/models/official/vision/dataloaders/__pycache__/input_reader.cpython-39.pyc b/models/official/vision/dataloaders/__pycache__/input_reader.cpython-39.pyc index acf695f3..c7241086 100644 Binary files a/models/official/vision/dataloaders/__pycache__/input_reader.cpython-39.pyc and b/models/official/vision/dataloaders/__pycache__/input_reader.cpython-39.pyc differ diff --git a/models/official/vision/dataloaders/__pycache__/input_reader_factory.cpython-39.pyc b/models/official/vision/dataloaders/__pycache__/input_reader_factory.cpython-39.pyc index 8bff6a80..edd4a5bd 100644 Binary files a/models/official/vision/dataloaders/__pycache__/input_reader_factory.cpython-39.pyc and b/models/official/vision/dataloaders/__pycache__/input_reader_factory.cpython-39.pyc differ diff --git a/models/official/vision/dataloaders/__pycache__/maskrcnn_input.cpython-39.pyc b/models/official/vision/dataloaders/__pycache__/maskrcnn_input.cpython-39.pyc index 89b4c945..96b81966 100644 Binary files a/models/official/vision/dataloaders/__pycache__/maskrcnn_input.cpython-39.pyc and b/models/official/vision/dataloaders/__pycache__/maskrcnn_input.cpython-39.pyc differ diff --git a/models/official/vision/dataloaders/__pycache__/parser.cpython-39.pyc b/models/official/vision/dataloaders/__pycache__/parser.cpython-39.pyc index 13b01a4c..13779528 100644 Binary files a/models/official/vision/dataloaders/__pycache__/parser.cpython-39.pyc and b/models/official/vision/dataloaders/__pycache__/parser.cpython-39.pyc differ diff --git a/models/official/vision/dataloaders/__pycache__/retinanet_input.cpython-39.pyc b/models/official/vision/dataloaders/__pycache__/retinanet_input.cpython-39.pyc index b071b213..8a64cc4d 100644 Binary files a/models/official/vision/dataloaders/__pycache__/retinanet_input.cpython-39.pyc and b/models/official/vision/dataloaders/__pycache__/retinanet_input.cpython-39.pyc differ diff --git a/models/official/vision/dataloaders/__pycache__/segmentation_input.cpython-39.pyc b/models/official/vision/dataloaders/__pycache__/segmentation_input.cpython-39.pyc index 665ac509..eb593f60 100644 Binary files a/models/official/vision/dataloaders/__pycache__/segmentation_input.cpython-39.pyc and b/models/official/vision/dataloaders/__pycache__/segmentation_input.cpython-39.pyc differ diff --git a/models/official/vision/dataloaders/__pycache__/tf_example_decoder.cpython-39.pyc b/models/official/vision/dataloaders/__pycache__/tf_example_decoder.cpython-39.pyc index 47fbf088..ad9fd8ae 100644 Binary files a/models/official/vision/dataloaders/__pycache__/tf_example_decoder.cpython-39.pyc and b/models/official/vision/dataloaders/__pycache__/tf_example_decoder.cpython-39.pyc differ diff --git a/models/official/vision/dataloaders/__pycache__/tf_example_label_map_decoder.cpython-39.pyc b/models/official/vision/dataloaders/__pycache__/tf_example_label_map_decoder.cpython-39.pyc index 3d6cad8d..303eb8f7 100644 Binary files a/models/official/vision/dataloaders/__pycache__/tf_example_label_map_decoder.cpython-39.pyc and b/models/official/vision/dataloaders/__pycache__/tf_example_label_map_decoder.cpython-39.pyc differ diff --git a/models/official/vision/dataloaders/__pycache__/tfds_classification_decoders.cpython-39.pyc b/models/official/vision/dataloaders/__pycache__/tfds_classification_decoders.cpython-39.pyc index a32724a8..84fc2187 100644 Binary files a/models/official/vision/dataloaders/__pycache__/tfds_classification_decoders.cpython-39.pyc and b/models/official/vision/dataloaders/__pycache__/tfds_classification_decoders.cpython-39.pyc differ diff --git a/models/official/vision/dataloaders/__pycache__/tfds_detection_decoders.cpython-39.pyc b/models/official/vision/dataloaders/__pycache__/tfds_detection_decoders.cpython-39.pyc index 4d7dc106..e20c9b71 100644 Binary files a/models/official/vision/dataloaders/__pycache__/tfds_detection_decoders.cpython-39.pyc and b/models/official/vision/dataloaders/__pycache__/tfds_detection_decoders.cpython-39.pyc differ diff --git a/models/official/vision/dataloaders/__pycache__/tfds_factory.cpython-39.pyc b/models/official/vision/dataloaders/__pycache__/tfds_factory.cpython-39.pyc index 91b56792..6b35c2ff 100644 Binary files a/models/official/vision/dataloaders/__pycache__/tfds_factory.cpython-39.pyc and b/models/official/vision/dataloaders/__pycache__/tfds_factory.cpython-39.pyc differ diff --git a/models/official/vision/dataloaders/__pycache__/tfds_segmentation_decoders.cpython-39.pyc b/models/official/vision/dataloaders/__pycache__/tfds_segmentation_decoders.cpython-39.pyc index 9ae57ad8..eeb00ef2 100644 Binary files a/models/official/vision/dataloaders/__pycache__/tfds_segmentation_decoders.cpython-39.pyc and b/models/official/vision/dataloaders/__pycache__/tfds_segmentation_decoders.cpython-39.pyc differ diff --git a/models/official/vision/dataloaders/__pycache__/utils.cpython-39.pyc b/models/official/vision/dataloaders/__pycache__/utils.cpython-39.pyc index b8d29f45..922109c7 100644 Binary files a/models/official/vision/dataloaders/__pycache__/utils.cpython-39.pyc and b/models/official/vision/dataloaders/__pycache__/utils.cpython-39.pyc differ diff --git a/models/official/vision/dataloaders/__pycache__/video_input.cpython-39.pyc b/models/official/vision/dataloaders/__pycache__/video_input.cpython-39.pyc index 6b4f4645..f03e5448 100644 Binary files a/models/official/vision/dataloaders/__pycache__/video_input.cpython-39.pyc and b/models/official/vision/dataloaders/__pycache__/video_input.cpython-39.pyc differ diff --git a/models/official/vision/dataloaders/input_reader.py b/models/official/vision/dataloaders/input_reader.py index 38cae7f5..af13ba0d 100644 --- a/models/official/vision/dataloaders/input_reader.py +++ b/models/official/vision/dataloaders/input_reader.py @@ -190,7 +190,7 @@ def read( labeled_batch_size, pl_batch_size = calculate_batch_sizes( self._global_batch_size, self._pseudo_label_data_ratio, self._pseudo_label_batch_size) - + if not labeled_batch_size and pl_batch_size: raise ValueError( 'Invalid batch_size: {} and pseudo_label_data_ratio: {}, ' diff --git a/models/official/vision/evaluation/__pycache__/__init__.cpython-39.pyc b/models/official/vision/evaluation/__pycache__/__init__.cpython-39.pyc index 989be085..4772afe6 100644 Binary files a/models/official/vision/evaluation/__pycache__/__init__.cpython-39.pyc and b/models/official/vision/evaluation/__pycache__/__init__.cpython-39.pyc differ diff --git a/models/official/vision/evaluation/__pycache__/coco_evaluator.cpython-39.pyc b/models/official/vision/evaluation/__pycache__/coco_evaluator.cpython-39.pyc index 924de2bf..cd52b08a 100644 Binary files a/models/official/vision/evaluation/__pycache__/coco_evaluator.cpython-39.pyc and b/models/official/vision/evaluation/__pycache__/coco_evaluator.cpython-39.pyc differ diff --git a/models/official/vision/evaluation/__pycache__/coco_utils.cpython-39.pyc b/models/official/vision/evaluation/__pycache__/coco_utils.cpython-39.pyc index c55b7920..f6935164 100644 Binary files a/models/official/vision/evaluation/__pycache__/coco_utils.cpython-39.pyc and b/models/official/vision/evaluation/__pycache__/coco_utils.cpython-39.pyc differ diff --git a/models/official/vision/evaluation/__pycache__/instance_metrics.cpython-39.pyc b/models/official/vision/evaluation/__pycache__/instance_metrics.cpython-39.pyc index 9df5ec50..3ee33b31 100644 Binary files a/models/official/vision/evaluation/__pycache__/instance_metrics.cpython-39.pyc and b/models/official/vision/evaluation/__pycache__/instance_metrics.cpython-39.pyc differ diff --git a/models/official/vision/evaluation/__pycache__/iou.cpython-39.pyc b/models/official/vision/evaluation/__pycache__/iou.cpython-39.pyc index db961088..8456c478 100644 Binary files a/models/official/vision/evaluation/__pycache__/iou.cpython-39.pyc and b/models/official/vision/evaluation/__pycache__/iou.cpython-39.pyc differ diff --git a/models/official/vision/evaluation/__pycache__/segmentation_metrics.cpython-39.pyc b/models/official/vision/evaluation/__pycache__/segmentation_metrics.cpython-39.pyc index 6b52d4e0..d4e3e096 100644 Binary files a/models/official/vision/evaluation/__pycache__/segmentation_metrics.cpython-39.pyc and b/models/official/vision/evaluation/__pycache__/segmentation_metrics.cpython-39.pyc differ diff --git a/models/official/vision/losses/__pycache__/__init__.cpython-39.pyc b/models/official/vision/losses/__pycache__/__init__.cpython-39.pyc index 53300321..53a80efd 100644 Binary files a/models/official/vision/losses/__pycache__/__init__.cpython-39.pyc and b/models/official/vision/losses/__pycache__/__init__.cpython-39.pyc differ diff --git a/models/official/vision/losses/__pycache__/focal_loss.cpython-39.pyc b/models/official/vision/losses/__pycache__/focal_loss.cpython-39.pyc index 4e83157b..e8d948f9 100644 Binary files a/models/official/vision/losses/__pycache__/focal_loss.cpython-39.pyc and b/models/official/vision/losses/__pycache__/focal_loss.cpython-39.pyc differ diff --git a/models/official/vision/losses/__pycache__/loss_utils.cpython-39.pyc b/models/official/vision/losses/__pycache__/loss_utils.cpython-39.pyc index 68041bfe..f87f15a1 100644 Binary files a/models/official/vision/losses/__pycache__/loss_utils.cpython-39.pyc and b/models/official/vision/losses/__pycache__/loss_utils.cpython-39.pyc differ diff --git a/models/official/vision/losses/__pycache__/maskrcnn_losses.cpython-39.pyc b/models/official/vision/losses/__pycache__/maskrcnn_losses.cpython-39.pyc index 439d779f..ca7c3836 100644 Binary files a/models/official/vision/losses/__pycache__/maskrcnn_losses.cpython-39.pyc and b/models/official/vision/losses/__pycache__/maskrcnn_losses.cpython-39.pyc differ diff --git a/models/official/vision/losses/__pycache__/segmentation_losses.cpython-39.pyc b/models/official/vision/losses/__pycache__/segmentation_losses.cpython-39.pyc index 47fa0bea..ae38a797 100644 Binary files a/models/official/vision/losses/__pycache__/segmentation_losses.cpython-39.pyc and b/models/official/vision/losses/__pycache__/segmentation_losses.cpython-39.pyc differ diff --git a/models/official/vision/modeling/__pycache__/__init__.cpython-39.pyc b/models/official/vision/modeling/__pycache__/__init__.cpython-39.pyc index 9dde48e0..b71eceeb 100644 Binary files a/models/official/vision/modeling/__pycache__/__init__.cpython-39.pyc and b/models/official/vision/modeling/__pycache__/__init__.cpython-39.pyc differ diff --git a/models/official/vision/modeling/__pycache__/classification_model.cpython-39.pyc b/models/official/vision/modeling/__pycache__/classification_model.cpython-39.pyc index f4168ed0..fa2ee357 100644 Binary files a/models/official/vision/modeling/__pycache__/classification_model.cpython-39.pyc and b/models/official/vision/modeling/__pycache__/classification_model.cpython-39.pyc differ diff --git a/models/official/vision/modeling/__pycache__/factory.cpython-39.pyc b/models/official/vision/modeling/__pycache__/factory.cpython-39.pyc index 6062f17a..58dd5240 100644 Binary files a/models/official/vision/modeling/__pycache__/factory.cpython-39.pyc and b/models/official/vision/modeling/__pycache__/factory.cpython-39.pyc differ diff --git a/models/official/vision/modeling/__pycache__/factory_3d.cpython-39.pyc b/models/official/vision/modeling/__pycache__/factory_3d.cpython-39.pyc index 650f2788..22537f12 100644 Binary files a/models/official/vision/modeling/__pycache__/factory_3d.cpython-39.pyc and b/models/official/vision/modeling/__pycache__/factory_3d.cpython-39.pyc differ diff --git a/models/official/vision/modeling/__pycache__/maskrcnn_model.cpython-39.pyc b/models/official/vision/modeling/__pycache__/maskrcnn_model.cpython-39.pyc index 6701eb93..845c463a 100644 Binary files a/models/official/vision/modeling/__pycache__/maskrcnn_model.cpython-39.pyc and b/models/official/vision/modeling/__pycache__/maskrcnn_model.cpython-39.pyc differ diff --git a/models/official/vision/modeling/__pycache__/retinanet_model.cpython-39.pyc b/models/official/vision/modeling/__pycache__/retinanet_model.cpython-39.pyc index 1b2ed42b..606fbce1 100644 Binary files a/models/official/vision/modeling/__pycache__/retinanet_model.cpython-39.pyc and b/models/official/vision/modeling/__pycache__/retinanet_model.cpython-39.pyc differ diff --git a/models/official/vision/modeling/__pycache__/segmentation_model.cpython-39.pyc b/models/official/vision/modeling/__pycache__/segmentation_model.cpython-39.pyc index f284ca5a..f30dd6d2 100644 Binary files a/models/official/vision/modeling/__pycache__/segmentation_model.cpython-39.pyc and b/models/official/vision/modeling/__pycache__/segmentation_model.cpython-39.pyc differ diff --git a/models/official/vision/modeling/__pycache__/video_classification_model.cpython-39.pyc b/models/official/vision/modeling/__pycache__/video_classification_model.cpython-39.pyc index aa590bea..71de84c7 100644 Binary files a/models/official/vision/modeling/__pycache__/video_classification_model.cpython-39.pyc and b/models/official/vision/modeling/__pycache__/video_classification_model.cpython-39.pyc differ diff --git a/models/official/vision/modeling/backbones/__pycache__/__init__.cpython-39.pyc b/models/official/vision/modeling/backbones/__pycache__/__init__.cpython-39.pyc index b360c804..b79ca790 100644 Binary files a/models/official/vision/modeling/backbones/__pycache__/__init__.cpython-39.pyc and b/models/official/vision/modeling/backbones/__pycache__/__init__.cpython-39.pyc differ diff --git a/models/official/vision/modeling/backbones/__pycache__/efficientnet.cpython-39.pyc b/models/official/vision/modeling/backbones/__pycache__/efficientnet.cpython-39.pyc index 4c1e7374..5de4f0d1 100644 Binary files a/models/official/vision/modeling/backbones/__pycache__/efficientnet.cpython-39.pyc and b/models/official/vision/modeling/backbones/__pycache__/efficientnet.cpython-39.pyc differ diff --git a/models/official/vision/modeling/backbones/__pycache__/factory.cpython-39.pyc b/models/official/vision/modeling/backbones/__pycache__/factory.cpython-39.pyc index 50017dfb..4f711e0e 100644 Binary files a/models/official/vision/modeling/backbones/__pycache__/factory.cpython-39.pyc and b/models/official/vision/modeling/backbones/__pycache__/factory.cpython-39.pyc differ diff --git a/models/official/vision/modeling/backbones/__pycache__/mobiledet.cpython-39.pyc b/models/official/vision/modeling/backbones/__pycache__/mobiledet.cpython-39.pyc index 988d1413..d714143d 100644 Binary files a/models/official/vision/modeling/backbones/__pycache__/mobiledet.cpython-39.pyc and b/models/official/vision/modeling/backbones/__pycache__/mobiledet.cpython-39.pyc differ diff --git a/models/official/vision/modeling/backbones/__pycache__/mobilenet.cpython-39.pyc b/models/official/vision/modeling/backbones/__pycache__/mobilenet.cpython-39.pyc index 1351c469..3832049c 100644 Binary files a/models/official/vision/modeling/backbones/__pycache__/mobilenet.cpython-39.pyc and b/models/official/vision/modeling/backbones/__pycache__/mobilenet.cpython-39.pyc differ diff --git a/models/official/vision/modeling/backbones/__pycache__/resnet.cpython-39.pyc b/models/official/vision/modeling/backbones/__pycache__/resnet.cpython-39.pyc index 612d10fe..8aa81a07 100644 Binary files a/models/official/vision/modeling/backbones/__pycache__/resnet.cpython-39.pyc and b/models/official/vision/modeling/backbones/__pycache__/resnet.cpython-39.pyc differ diff --git a/models/official/vision/modeling/backbones/__pycache__/resnet_3d.cpython-39.pyc b/models/official/vision/modeling/backbones/__pycache__/resnet_3d.cpython-39.pyc index 38a7fdd2..c8695b19 100644 Binary files a/models/official/vision/modeling/backbones/__pycache__/resnet_3d.cpython-39.pyc and b/models/official/vision/modeling/backbones/__pycache__/resnet_3d.cpython-39.pyc differ diff --git a/models/official/vision/modeling/backbones/__pycache__/resnet_deeplab.cpython-39.pyc b/models/official/vision/modeling/backbones/__pycache__/resnet_deeplab.cpython-39.pyc index 6c00f0d0..152c8a91 100644 Binary files a/models/official/vision/modeling/backbones/__pycache__/resnet_deeplab.cpython-39.pyc and b/models/official/vision/modeling/backbones/__pycache__/resnet_deeplab.cpython-39.pyc differ diff --git a/models/official/vision/modeling/backbones/__pycache__/revnet.cpython-39.pyc b/models/official/vision/modeling/backbones/__pycache__/revnet.cpython-39.pyc index 633e7450..fb7b3de5 100644 Binary files a/models/official/vision/modeling/backbones/__pycache__/revnet.cpython-39.pyc and b/models/official/vision/modeling/backbones/__pycache__/revnet.cpython-39.pyc differ diff --git a/models/official/vision/modeling/backbones/__pycache__/spinenet.cpython-39.pyc b/models/official/vision/modeling/backbones/__pycache__/spinenet.cpython-39.pyc index c69a61cd..0fbecda8 100644 Binary files a/models/official/vision/modeling/backbones/__pycache__/spinenet.cpython-39.pyc and b/models/official/vision/modeling/backbones/__pycache__/spinenet.cpython-39.pyc differ diff --git a/models/official/vision/modeling/backbones/__pycache__/spinenet_mobile.cpython-39.pyc b/models/official/vision/modeling/backbones/__pycache__/spinenet_mobile.cpython-39.pyc index 285c9424..89c9ea4a 100644 Binary files a/models/official/vision/modeling/backbones/__pycache__/spinenet_mobile.cpython-39.pyc and b/models/official/vision/modeling/backbones/__pycache__/spinenet_mobile.cpython-39.pyc differ diff --git a/models/official/vision/modeling/backbones/__pycache__/vit.cpython-39.pyc b/models/official/vision/modeling/backbones/__pycache__/vit.cpython-39.pyc index 6fe2e4cc..db368c9b 100644 Binary files a/models/official/vision/modeling/backbones/__pycache__/vit.cpython-39.pyc and b/models/official/vision/modeling/backbones/__pycache__/vit.cpython-39.pyc differ diff --git a/models/official/vision/modeling/backbones/__pycache__/vit_specs.cpython-39.pyc b/models/official/vision/modeling/backbones/__pycache__/vit_specs.cpython-39.pyc index 26524178..783f5f2f 100644 Binary files a/models/official/vision/modeling/backbones/__pycache__/vit_specs.cpython-39.pyc and b/models/official/vision/modeling/backbones/__pycache__/vit_specs.cpython-39.pyc differ diff --git a/models/official/vision/modeling/decoders/__pycache__/__init__.cpython-39.pyc b/models/official/vision/modeling/decoders/__pycache__/__init__.cpython-39.pyc index 74fc63a2..1e5dcb36 100644 Binary files a/models/official/vision/modeling/decoders/__pycache__/__init__.cpython-39.pyc and b/models/official/vision/modeling/decoders/__pycache__/__init__.cpython-39.pyc differ diff --git a/models/official/vision/modeling/decoders/__pycache__/aspp.cpython-39.pyc b/models/official/vision/modeling/decoders/__pycache__/aspp.cpython-39.pyc index b496cf76..85cd96a8 100644 Binary files a/models/official/vision/modeling/decoders/__pycache__/aspp.cpython-39.pyc and b/models/official/vision/modeling/decoders/__pycache__/aspp.cpython-39.pyc differ diff --git a/models/official/vision/modeling/decoders/__pycache__/factory.cpython-39.pyc b/models/official/vision/modeling/decoders/__pycache__/factory.cpython-39.pyc index 59969ae8..5751a8e6 100644 Binary files a/models/official/vision/modeling/decoders/__pycache__/factory.cpython-39.pyc and b/models/official/vision/modeling/decoders/__pycache__/factory.cpython-39.pyc differ diff --git a/models/official/vision/modeling/decoders/__pycache__/fpn.cpython-39.pyc b/models/official/vision/modeling/decoders/__pycache__/fpn.cpython-39.pyc index e8521fc4..3bf9cd83 100644 Binary files a/models/official/vision/modeling/decoders/__pycache__/fpn.cpython-39.pyc and b/models/official/vision/modeling/decoders/__pycache__/fpn.cpython-39.pyc differ diff --git a/models/official/vision/modeling/decoders/__pycache__/nasfpn.cpython-39.pyc b/models/official/vision/modeling/decoders/__pycache__/nasfpn.cpython-39.pyc index 47fdbf1a..1157544d 100644 Binary files a/models/official/vision/modeling/decoders/__pycache__/nasfpn.cpython-39.pyc and b/models/official/vision/modeling/decoders/__pycache__/nasfpn.cpython-39.pyc differ diff --git a/models/official/vision/modeling/heads/__pycache__/__init__.cpython-39.pyc b/models/official/vision/modeling/heads/__pycache__/__init__.cpython-39.pyc index 1eb8bd44..397863d7 100644 Binary files a/models/official/vision/modeling/heads/__pycache__/__init__.cpython-39.pyc and b/models/official/vision/modeling/heads/__pycache__/__init__.cpython-39.pyc differ diff --git a/models/official/vision/modeling/heads/__pycache__/dense_prediction_heads.cpython-39.pyc b/models/official/vision/modeling/heads/__pycache__/dense_prediction_heads.cpython-39.pyc index ae51ea84..71e746fa 100644 Binary files a/models/official/vision/modeling/heads/__pycache__/dense_prediction_heads.cpython-39.pyc and b/models/official/vision/modeling/heads/__pycache__/dense_prediction_heads.cpython-39.pyc differ diff --git a/models/official/vision/modeling/heads/__pycache__/instance_heads.cpython-39.pyc b/models/official/vision/modeling/heads/__pycache__/instance_heads.cpython-39.pyc index d2b3e6b6..2280714a 100644 Binary files a/models/official/vision/modeling/heads/__pycache__/instance_heads.cpython-39.pyc and b/models/official/vision/modeling/heads/__pycache__/instance_heads.cpython-39.pyc differ diff --git a/models/official/vision/modeling/heads/__pycache__/segmentation_heads.cpython-39.pyc b/models/official/vision/modeling/heads/__pycache__/segmentation_heads.cpython-39.pyc index 7dc95b30..511f2f6b 100644 Binary files a/models/official/vision/modeling/heads/__pycache__/segmentation_heads.cpython-39.pyc and b/models/official/vision/modeling/heads/__pycache__/segmentation_heads.cpython-39.pyc differ diff --git a/models/official/vision/modeling/layers/__pycache__/__init__.cpython-39.pyc b/models/official/vision/modeling/layers/__pycache__/__init__.cpython-39.pyc index c6463ad9..f3af43b2 100644 Binary files a/models/official/vision/modeling/layers/__pycache__/__init__.cpython-39.pyc and b/models/official/vision/modeling/layers/__pycache__/__init__.cpython-39.pyc differ diff --git a/models/official/vision/modeling/layers/__pycache__/box_sampler.cpython-39.pyc b/models/official/vision/modeling/layers/__pycache__/box_sampler.cpython-39.pyc index aea33ee2..ebb879c8 100644 Binary files a/models/official/vision/modeling/layers/__pycache__/box_sampler.cpython-39.pyc and b/models/official/vision/modeling/layers/__pycache__/box_sampler.cpython-39.pyc differ diff --git a/models/official/vision/modeling/layers/__pycache__/deeplab.cpython-39.pyc b/models/official/vision/modeling/layers/__pycache__/deeplab.cpython-39.pyc index 67ca134f..c229b763 100644 Binary files a/models/official/vision/modeling/layers/__pycache__/deeplab.cpython-39.pyc and b/models/official/vision/modeling/layers/__pycache__/deeplab.cpython-39.pyc differ diff --git a/models/official/vision/modeling/layers/__pycache__/detection_generator.cpython-39.pyc b/models/official/vision/modeling/layers/__pycache__/detection_generator.cpython-39.pyc index 216c232b..12a2d87c 100644 Binary files a/models/official/vision/modeling/layers/__pycache__/detection_generator.cpython-39.pyc and b/models/official/vision/modeling/layers/__pycache__/detection_generator.cpython-39.pyc differ diff --git a/models/official/vision/modeling/layers/__pycache__/edgetpu.cpython-39.pyc b/models/official/vision/modeling/layers/__pycache__/edgetpu.cpython-39.pyc index c990fdf2..70361410 100644 Binary files a/models/official/vision/modeling/layers/__pycache__/edgetpu.cpython-39.pyc and b/models/official/vision/modeling/layers/__pycache__/edgetpu.cpython-39.pyc differ diff --git a/models/official/vision/modeling/layers/__pycache__/mask_sampler.cpython-39.pyc b/models/official/vision/modeling/layers/__pycache__/mask_sampler.cpython-39.pyc index 9e558f77..3f0be642 100644 Binary files a/models/official/vision/modeling/layers/__pycache__/mask_sampler.cpython-39.pyc and b/models/official/vision/modeling/layers/__pycache__/mask_sampler.cpython-39.pyc differ diff --git a/models/official/vision/modeling/layers/__pycache__/nn_blocks.cpython-39.pyc b/models/official/vision/modeling/layers/__pycache__/nn_blocks.cpython-39.pyc index b900218b..5ce293ef 100644 Binary files a/models/official/vision/modeling/layers/__pycache__/nn_blocks.cpython-39.pyc and b/models/official/vision/modeling/layers/__pycache__/nn_blocks.cpython-39.pyc differ diff --git a/models/official/vision/modeling/layers/__pycache__/nn_blocks_3d.cpython-39.pyc b/models/official/vision/modeling/layers/__pycache__/nn_blocks_3d.cpython-39.pyc index b7fa4e0c..4839befa 100644 Binary files a/models/official/vision/modeling/layers/__pycache__/nn_blocks_3d.cpython-39.pyc and b/models/official/vision/modeling/layers/__pycache__/nn_blocks_3d.cpython-39.pyc differ diff --git a/models/official/vision/modeling/layers/__pycache__/nn_layers.cpython-39.pyc b/models/official/vision/modeling/layers/__pycache__/nn_layers.cpython-39.pyc index 3fcce21c..16273210 100644 Binary files a/models/official/vision/modeling/layers/__pycache__/nn_layers.cpython-39.pyc and b/models/official/vision/modeling/layers/__pycache__/nn_layers.cpython-39.pyc differ diff --git a/models/official/vision/modeling/layers/__pycache__/roi_aligner.cpython-39.pyc b/models/official/vision/modeling/layers/__pycache__/roi_aligner.cpython-39.pyc index c4f341b9..87e650cf 100644 Binary files a/models/official/vision/modeling/layers/__pycache__/roi_aligner.cpython-39.pyc and b/models/official/vision/modeling/layers/__pycache__/roi_aligner.cpython-39.pyc differ diff --git a/models/official/vision/modeling/layers/__pycache__/roi_generator.cpython-39.pyc b/models/official/vision/modeling/layers/__pycache__/roi_generator.cpython-39.pyc index 84c2da2a..16171547 100644 Binary files a/models/official/vision/modeling/layers/__pycache__/roi_generator.cpython-39.pyc and b/models/official/vision/modeling/layers/__pycache__/roi_generator.cpython-39.pyc differ diff --git a/models/official/vision/modeling/layers/__pycache__/roi_sampler.cpython-39.pyc b/models/official/vision/modeling/layers/__pycache__/roi_sampler.cpython-39.pyc index 92e41c57..6af07b87 100644 Binary files a/models/official/vision/modeling/layers/__pycache__/roi_sampler.cpython-39.pyc and b/models/official/vision/modeling/layers/__pycache__/roi_sampler.cpython-39.pyc differ diff --git a/models/official/vision/modeling/models/__pycache__/__init__.cpython-39.pyc b/models/official/vision/modeling/models/__pycache__/__init__.cpython-39.pyc index 09392f15..d23c24cb 100644 Binary files a/models/official/vision/modeling/models/__pycache__/__init__.cpython-39.pyc and b/models/official/vision/modeling/models/__pycache__/__init__.cpython-39.pyc differ diff --git a/models/official/vision/ops/__pycache__/__init__.cpython-39.pyc b/models/official/vision/ops/__pycache__/__init__.cpython-39.pyc index 29429709..47e09880 100644 Binary files a/models/official/vision/ops/__pycache__/__init__.cpython-39.pyc and b/models/official/vision/ops/__pycache__/__init__.cpython-39.pyc differ diff --git a/models/official/vision/ops/__pycache__/anchor.cpython-39.pyc b/models/official/vision/ops/__pycache__/anchor.cpython-39.pyc index 416c6774..43764d57 100644 Binary files a/models/official/vision/ops/__pycache__/anchor.cpython-39.pyc and b/models/official/vision/ops/__pycache__/anchor.cpython-39.pyc differ diff --git a/models/official/vision/ops/__pycache__/anchor_generator.cpython-39.pyc b/models/official/vision/ops/__pycache__/anchor_generator.cpython-39.pyc index eac01ad4..31b590e0 100644 Binary files a/models/official/vision/ops/__pycache__/anchor_generator.cpython-39.pyc and b/models/official/vision/ops/__pycache__/anchor_generator.cpython-39.pyc differ diff --git a/models/official/vision/ops/__pycache__/augment.cpython-39.pyc b/models/official/vision/ops/__pycache__/augment.cpython-39.pyc index a89d45c7..96d671f4 100644 Binary files a/models/official/vision/ops/__pycache__/augment.cpython-39.pyc and b/models/official/vision/ops/__pycache__/augment.cpython-39.pyc differ diff --git a/models/official/vision/ops/__pycache__/box_matcher.cpython-39.pyc b/models/official/vision/ops/__pycache__/box_matcher.cpython-39.pyc index e37f2ea4..49ae0429 100644 Binary files a/models/official/vision/ops/__pycache__/box_matcher.cpython-39.pyc and b/models/official/vision/ops/__pycache__/box_matcher.cpython-39.pyc differ diff --git a/models/official/vision/ops/__pycache__/box_ops.cpython-39.pyc b/models/official/vision/ops/__pycache__/box_ops.cpython-39.pyc index 9b22a986..8deca1b5 100644 Binary files a/models/official/vision/ops/__pycache__/box_ops.cpython-39.pyc and b/models/official/vision/ops/__pycache__/box_ops.cpython-39.pyc differ diff --git a/models/official/vision/ops/__pycache__/iou_similarity.cpython-39.pyc b/models/official/vision/ops/__pycache__/iou_similarity.cpython-39.pyc index 5fb687e3..f69ba4b8 100644 Binary files a/models/official/vision/ops/__pycache__/iou_similarity.cpython-39.pyc and b/models/official/vision/ops/__pycache__/iou_similarity.cpython-39.pyc differ diff --git a/models/official/vision/ops/__pycache__/mask_ops.cpython-39.pyc b/models/official/vision/ops/__pycache__/mask_ops.cpython-39.pyc index 773a39c0..4ca8d9ef 100644 Binary files a/models/official/vision/ops/__pycache__/mask_ops.cpython-39.pyc and b/models/official/vision/ops/__pycache__/mask_ops.cpython-39.pyc differ diff --git a/models/official/vision/ops/__pycache__/nms.cpython-39.pyc b/models/official/vision/ops/__pycache__/nms.cpython-39.pyc index c34eb8b3..1464269c 100644 Binary files a/models/official/vision/ops/__pycache__/nms.cpython-39.pyc and b/models/official/vision/ops/__pycache__/nms.cpython-39.pyc differ diff --git a/models/official/vision/ops/__pycache__/preprocess_ops.cpython-39.pyc b/models/official/vision/ops/__pycache__/preprocess_ops.cpython-39.pyc index 1af06b27..6cfe4477 100644 Binary files a/models/official/vision/ops/__pycache__/preprocess_ops.cpython-39.pyc and b/models/official/vision/ops/__pycache__/preprocess_ops.cpython-39.pyc differ diff --git a/models/official/vision/ops/__pycache__/preprocess_ops_3d.cpython-39.pyc b/models/official/vision/ops/__pycache__/preprocess_ops_3d.cpython-39.pyc index 4ab2e180..e91acd80 100644 Binary files a/models/official/vision/ops/__pycache__/preprocess_ops_3d.cpython-39.pyc and b/models/official/vision/ops/__pycache__/preprocess_ops_3d.cpython-39.pyc differ diff --git a/models/official/vision/ops/__pycache__/sampling_ops.cpython-39.pyc b/models/official/vision/ops/__pycache__/sampling_ops.cpython-39.pyc index 65727cfe..9d5960b2 100644 Binary files a/models/official/vision/ops/__pycache__/sampling_ops.cpython-39.pyc and b/models/official/vision/ops/__pycache__/sampling_ops.cpython-39.pyc differ diff --git a/models/official/vision/ops/__pycache__/spatial_transform_ops.cpython-39.pyc b/models/official/vision/ops/__pycache__/spatial_transform_ops.cpython-39.pyc index b3484bea..4631b4f6 100644 Binary files a/models/official/vision/ops/__pycache__/spatial_transform_ops.cpython-39.pyc and b/models/official/vision/ops/__pycache__/spatial_transform_ops.cpython-39.pyc differ diff --git a/models/official/vision/ops/__pycache__/target_gather.cpython-39.pyc b/models/official/vision/ops/__pycache__/target_gather.cpython-39.pyc index 14f08cce..341929e2 100644 Binary files a/models/official/vision/ops/__pycache__/target_gather.cpython-39.pyc and b/models/official/vision/ops/__pycache__/target_gather.cpython-39.pyc differ diff --git a/models/official/vision/ops/preprocess_ops.py b/models/official/vision/ops/preprocess_ops.py index 890e3f70..dba6d61b 100644 --- a/models/official/vision/ops/preprocess_ops.py +++ b/models/official/vision/ops/preprocess_ops.py @@ -36,104 +36,81 @@ def clip_or_pad_to_fixed_size(input_tensor, size, constant_values=0): - """Pads data to a fixed length at the first dimension. - - Args: - input_tensor: `Tensor` with any dimension. - size: `int` number for the first dimension of output Tensor. - constant_values: `int` value assigned to the paddings. - - Returns: - `Tensor` with the first dimension padded to `size`. - """ - input_shape = input_tensor.get_shape().as_list() - padding_shape = [] - - # Computes the padding length on the first dimension, clip input tensor if it - # is longer than `size`. - input_length = tf.shape(input_tensor)[0] - input_length = tf.clip_by_value(input_length, 0, size) - input_tensor = input_tensor[:input_length] - - padding_length = tf.maximum(0, size - input_length) - padding_shape.append(padding_length) - - # Copies shapes of the rest of input shape dimensions. - for i in range(1, len(input_shape)): - padding_shape.append(tf.shape(input_tensor)[i]) - - # Pads input tensor to the fixed first dimension. - paddings = tf.cast(constant_values * tf.ones(padding_shape), - input_tensor.dtype) - padded_tensor = tf.concat([input_tensor, paddings], axis=0) - output_shape = input_shape - output_shape[0] = size - padded_tensor.set_shape(output_shape) - return padded_tensor - - -def normalize_image(image: tf.Tensor, - offset: Sequence[float] = MEAN_NORM, - scale: Sequence[float] = STDDEV_NORM): - """Normalizes the image to zero mean and unit variance.""" - with tf.name_scope('normalize_image'): - image = tf.image.convert_image_dtype(image, dtype=tf.float32) - return normalize_scaled_float_image(image, offset, scale) - - -def normalize_scaled_float_image(image: tf.Tensor, - offset: Sequence[float] = MEAN_NORM, - scale: Sequence[float] = STDDEV_NORM): - """Normalizes a scaled float image to zero mean and unit variance. - - It assumes the input image is float dtype with values in [0, 1). - - Args: - image: A tf.Tensor in float32 dtype with values in range [0, 1). - offset: A tuple of mean values to be subtracted from the image. - scale: A tuple of normalization factors. - - Returns: - A normalized image tensor. - """ - offset = tf.constant(offset) - offset = tf.expand_dims(offset, axis=0) - offset = tf.expand_dims(offset, axis=0) - image -= offset - - scale = tf.constant(scale) - scale = tf.expand_dims(scale, axis=0) - scale = tf.expand_dims(scale, axis=0) - image /= scale - return image + """Pads data to a fixed length at the first dimension. + Args: + input_tensor: `Tensor` with any dimension. + size: `int` number for the first dimension of output Tensor. + constant_values: `int` value assigned to the paddings. + Returns: + `Tensor` with the first dimension padded to `size`. + """ + input_shape = input_tensor.get_shape().as_list() + padding_shape = [] + + # Computes the padding length on the first dimension, clip input tensor if it + # is longer than `size`. + input_length = tf.shape(input_tensor)[0] + input_length = tf.clip_by_value(input_length, 0, size) + input_tensor = input_tensor[:input_length] + + padding_length = tf.maximum(0, size - input_length) + padding_shape.append(padding_length) + + # Copies shapes of the rest of input shape dimensions. + for i in range(1, len(input_shape)): + padding_shape.append(tf.shape(input_tensor)[i]) + + # Pads input tensor to the fixed first dimension. + paddings = tf.cast(constant_values * tf.ones(padding_shape), + input_tensor.dtype) + padded_tensor = tf.concat([input_tensor, paddings], axis=0) + output_shape = input_shape + output_shape[0] = size + padded_tensor.set_shape(output_shape) + return padded_tensor + + +def normalize_image(image, + offset=(0.485, 0.456, 0.406), + scale=(0.229, 0.224, 0.225)): + """Normalizes the image to zero mean and unit variance.""" + with tf.name_scope('normalize_image'): + image = tf.image.convert_image_dtype(image, dtype=tf.float32) + offset = tf.constant(offset) + offset = tf.expand_dims(offset, axis=0) + offset = tf.expand_dims(offset, axis=0) + image -= offset + + scale = tf.constant(scale) + scale = tf.expand_dims(scale, axis=0) + scale = tf.expand_dims(scale, axis=0) + image /= scale + return image def compute_padded_size(desired_size, stride): - """Compute the padded size given the desired size and the stride. - - The padded size will be the smallest rectangle, such that each dimension is - the smallest multiple of the stride which is larger than the desired - dimension. For example, if desired_size = (100, 200) and stride = 32, - the output padded_size = (128, 224). - - Args: - desired_size: a `Tensor` or `int` list/tuple of two elements representing - [height, width] of the target output image size. - stride: an integer, the stride of the backbone network. - - Returns: - padded_size: a `Tensor` or `int` list/tuple of two elements representing - [height, width] of the padded output image size. - """ - if isinstance(desired_size, list) or isinstance(desired_size, tuple): - padded_size = [int(math.ceil(d * 1.0 / stride) * stride) - for d in desired_size] - else: - padded_size = tf.cast( - tf.math.ceil( - tf.cast(desired_size, dtype=tf.float32) / stride) * stride, - tf.int32) - return padded_size + """Compute the padded size given the desired size and the stride. + The padded size will be the smallest rectangle, such that each dimension is + the smallest multiple of the stride which is larger than the desired + dimension. For example, if desired_size = (100, 200) and stride = 32, + the output padded_size = (128, 224). + Args: + desired_size: a `Tensor` or `int` list/tuple of two elements representing + [height, width] of the target output image size. + stride: an integer, the stride of the backbone network. + Returns: + padded_size: a `Tensor` or `int` list/tuple of two elements representing + [height, width] of the padded output image size. + """ + if isinstance(desired_size, list) or isinstance(desired_size, tuple): + padded_size = [int(math.ceil(d * 1.0 / stride) * stride) + for d in desired_size] + else: + padded_size = tf.cast( + tf.math.ceil( + tf.cast(desired_size, dtype=tf.float32) / stride) * stride, + tf.int32) + return padded_size def resize_and_crop_image(image, @@ -143,94 +120,88 @@ def resize_and_crop_image(image, aug_scale_max=1.0, seed=1, method=tf.image.ResizeMethod.BILINEAR): - """Resizes the input image to output size (RetinaNet style). - - Resize and pad images given the desired output size of the image and - stride size. - - Here are the preprocessing steps. - 1. For a given image, keep its aspect ratio and rescale the image to make it - the largest rectangle to be bounded by the rectangle specified by the - `desired_size`. - 2. Pad the rescaled image to the padded_size. - - Args: - image: a `Tensor` of shape [height, width, 3] representing an image. - desired_size: a `Tensor` or `int` list/tuple of two elements representing - [height, width] of the desired actual output image size. - padded_size: a `Tensor` or `int` list/tuple of two elements representing - [height, width] of the padded output image size. Padding will be applied - after scaling the image to the desired_size. - aug_scale_min: a `float` with range between [0, 1.0] representing minimum - random scale applied to desired_size for training scale jittering. - aug_scale_max: a `float` with range between [1.0, inf] representing maximum - random scale applied to desired_size for training scale jittering. - seed: seed for random scale jittering. - method: function to resize input image to scaled image. - - Returns: - output_image: `Tensor` of shape [height, width, 3] where [height, width] - equals to `output_size`. - image_info: a 2D `Tensor` that encodes the information of the image and the - applied preprocessing. It is in the format of - [[original_height, original_width], [desired_height, desired_width], - [y_scale, x_scale], [y_offset, x_offset]], where [desired_height, - desired_width] is the actual scaled image size, and [y_scale, x_scale] is - the scaling factor, which is the ratio of - scaled dimension / original dimension. - """ - with tf.name_scope('resize_and_crop_image'): - image_size = tf.cast(tf.shape(image)[0:2], tf.float32) - - random_jittering = ( - isinstance(aug_scale_min, tf.Tensor) - or isinstance(aug_scale_max, tf.Tensor) - or not math.isclose(aug_scale_min, 1.0) - or not math.isclose(aug_scale_max, 1.0) - ) - - if random_jittering: - random_scale = tf.random.uniform( - [], aug_scale_min, aug_scale_max, seed=seed) - scaled_size = tf.round(random_scale * tf.cast(desired_size, tf.float32)) - else: - scaled_size = tf.cast(desired_size, tf.float32) - - scale = tf.minimum( - scaled_size[0] / image_size[0], scaled_size[1] / image_size[1]) - scaled_size = tf.round(image_size * scale) - - # Computes 2D image_scale. - image_scale = scaled_size / image_size - - # Selects non-zero random offset (x, y) if scaled image is larger than - # desired_size. - if random_jittering: - max_offset = scaled_size - tf.cast(desired_size, tf.float32) - max_offset = tf.where( - tf.less(max_offset, 0), tf.zeros_like(max_offset), max_offset) - offset = max_offset * tf.random.uniform([2,], 0, 1, seed=seed) - offset = tf.cast(offset, tf.int32) - else: - offset = tf.zeros((2,), tf.int32) - - scaled_image = tf.image.resize( - image, tf.cast(scaled_size, tf.int32), method=method) - - if random_jittering: - scaled_image = scaled_image[ - offset[0]:offset[0] + desired_size[0], - offset[1]:offset[1] + desired_size[1], :] - - output_image = tf.image.pad_to_bounding_box( - scaled_image, 0, 0, padded_size[0], padded_size[1]) - - image_info = tf.stack([ - image_size, - tf.cast(desired_size, dtype=tf.float32), - image_scale, - tf.cast(offset, tf.float32)]) - return output_image, image_info + """Resizes the input image to output size (RetinaNet style). + Resize and pad images given the desired output size of the image and + stride size. + Here are the preprocessing steps. + 1. For a given image, keep its aspect ratio and rescale the image to make it + the largest rectangle to be bounded by the rectangle specified by the + `desired_size`. + 2. Pad the rescaled image to the padded_size. + Args: + image: a `Tensor` of shape [height, width, 3] representing an image. + desired_size: a `Tensor` or `int` list/tuple of two elements representing + [height, width] of the desired actual output image size. + padded_size: a `Tensor` or `int` list/tuple of two elements representing + [height, width] of the padded output image size. Padding will be applied + after scaling the image to the desired_size. + aug_scale_min: a `float` with range between [0, 1.0] representing minimum + random scale applied to desired_size for training scale jittering. + aug_scale_max: a `float` with range between [1.0, inf] representing maximum + random scale applied to desired_size for training scale jittering. + seed: seed for random scale jittering. + method: function to resize input image to scaled image. + Returns: + output_image: `Tensor` of shape [height, width, 3] where [height, width] + equals to `output_size`. + image_info: a 2D `Tensor` that encodes the information of the image and the + applied preprocessing. It is in the format of + [[original_height, original_width], [desired_height, desired_width], + [y_scale, x_scale], [y_offset, x_offset]], where [desired_height, + desired_width] is the actual scaled image size, and [y_scale, x_scale] is + the scaling factor, which is the ratio of + scaled dimension / original dimension. + """ + with tf.name_scope('resize_and_crop_image'): + image_size = tf.cast(tf.shape(image)[0:2], tf.float32) + desired_size = tf.cast(desired_size, tf.float32) + random_jittering = (aug_scale_min != 1.0 or aug_scale_max != 1.0) + + if random_jittering: + random_scale = tf.random.uniform( + [], aug_scale_min, aug_scale_max, seed=seed) + scaled_size = tf.round(random_scale * desired_size) + else: + scaled_size = desired_size + + scale = tf.minimum( + scaled_size[0] / image_size[0], scaled_size[1] / image_size[1]) + scaled_size = tf.round(image_size * scale) + + # Computes 2D image_scale. + image_scale = scaled_size / image_size + + # Selects non-zero random offset (x, y) if scaled image is larger than + # desired_size. + if random_jittering: + max_offset = scaled_size - desired_size + max_offset = tf.where( + tf.less(max_offset, 0), tf.zeros_like(max_offset), max_offset) + offset = max_offset * tf.random.uniform([2, ], 0, 1, seed=seed) + offset = tf.cast(offset, tf.int32) + else: + offset = tf.zeros((2,), tf.int32) + + scaled_image = tf.image.resize( + image, tf.cast(scaled_size, tf.int32), method=method) + + if random_jittering: + desired_size = tf.cast(desired_size, tf.int32) + scaled_image = scaled_image[ + offset[0]:offset[0] + desired_size[0], + offset[1]:offset[1] + desired_size[1], :] + desired_size = tf.cast(desired_size, tf.float32) + + output_image = tf.image.pad_to_bounding_box( + scaled_image, 0, 0, padded_size[0], padded_size[1]) + + desired_size = tf.cast(desired_size, tf.float32) + image_info = tf.stack([ + image_size, + desired_size, + image_scale, + tf.cast(offset, tf.float32)]) + return output_image, image_info def resize_and_crop_image_v2(image, @@ -241,255 +212,228 @@ def resize_and_crop_image_v2(image, aug_scale_max=1.0, seed=1, method=tf.image.ResizeMethod.BILINEAR): - """Resizes the input image to output size (Faster R-CNN style). - - Resize and pad images given the specified short / long side length and the - stride size. - - Here are the preprocessing steps. - 1. For a given image, keep its aspect ratio and first try to rescale the short - side of the original image to `short_side`. - 2. If the scaled image after 1 has a long side that exceeds `long_side`, keep - the aspect ratio and rescale the long side of the image to `long_side`. - 3. (Optional) Apply random jittering according to `aug_scale_min` and - `aug_scale_max`. By default this step is skipped. - 4. Pad the rescaled image to the padded_size. - - Args: - image: a `Tensor` of shape [height, width, 3] representing an image. - short_side: a scalar `Tensor` or `int` representing the desired short side - to be rescaled to. - long_side: a scalar `Tensor` or `int` representing the desired long side to - be rescaled to. - padded_size: a `Tensor` or `int` list/tuple of two elements representing - [height, width] of the padded output image size. - aug_scale_min: a `float` with range between [0, 1.0] representing minimum - random scale applied for training scale jittering. - aug_scale_max: a `float` with range between [1.0, inf] representing maximum - random scale applied for training scale jittering. - seed: seed for random scale jittering. - method: function to resize input image to scaled image. - - Returns: - output_image: `Tensor` of shape [height, width, 3] where [height, width] - equals to `output_size`. - image_info: a 2D `Tensor` that encodes the information of the image and the - applied preprocessing. It is in the format of - [[original_height, original_width], [desired_height, desired_width], - [y_scale, x_scale], [y_offset, x_offset]], where [desired_height, - desired_width] is the actual scaled image size, and [y_scale, x_scale] is - the scaling factor, which is the ratio of - scaled dimension / original dimension. - """ - with tf.name_scope('resize_and_crop_image_v2'): - image_size = tf.cast(tf.shape(image)[0:2], tf.float32) - - scale_using_short_side = ( - short_side / tf.math.minimum(image_size[0], image_size[1])) - scale_using_long_side = ( - long_side / tf.math.maximum(image_size[0], image_size[1])) - - scaled_size = tf.math.round(image_size * scale_using_short_side) - scaled_size = tf.where( - tf.math.greater( - tf.math.maximum(scaled_size[0], scaled_size[1]), long_side), - tf.math.round(image_size * scale_using_long_side), - scaled_size) - desired_size = scaled_size - - random_jittering = ( - isinstance(aug_scale_min, tf.Tensor) - or isinstance(aug_scale_max, tf.Tensor) - or not math.isclose(aug_scale_min, 1.0) - or not math.isclose(aug_scale_max, 1.0) - ) - - if random_jittering: - random_scale = tf.random.uniform( - [], aug_scale_min, aug_scale_max, seed=seed) - scaled_size = tf.math.round(random_scale * scaled_size) - - # Computes 2D image_scale. - image_scale = scaled_size / image_size - - # Selects non-zero random offset (x, y) if scaled image is larger than - # desired_size. - if random_jittering: - max_offset = scaled_size - desired_size - max_offset = tf.where( - tf.math.less(max_offset, 0), tf.zeros_like(max_offset), max_offset) - offset = max_offset * tf.random.uniform([2,], 0, 1, seed=seed) - offset = tf.cast(offset, tf.int32) - else: - offset = tf.zeros((2,), tf.int32) - - scaled_image = tf.image.resize( - image, tf.cast(scaled_size, tf.int32), method=method) - - if random_jittering: - scaled_image = scaled_image[ - offset[0]:offset[0] + desired_size[0], - offset[1]:offset[1] + desired_size[1], :] - - output_image = tf.image.pad_to_bounding_box( - scaled_image, 0, 0, padded_size[0], padded_size[1]) - - image_info = tf.stack([ - image_size, - tf.cast(desired_size, dtype=tf.float32), - image_scale, - tf.cast(offset, tf.float32)]) - return output_image, image_info + """Resizes the input image to output size (Faster R-CNN style). + Resize and pad images given the specified short / long side length and the + stride size. + Here are the preprocessing steps. + 1. For a given image, keep its aspect ratio and first try to rescale the short + side of the original image to `short_side`. + 2. If the scaled image after 1 has a long side that exceeds `long_side`, keep + the aspect ratio and rescal the long side of the image to `long_side`. + 2. Pad the rescaled image to the padded_size. + Args: + image: a `Tensor` of shape [height, width, 3] representing an image. + short_side: a scalar `Tensor` or `int` representing the desired short side + to be rescaled to. + long_side: a scalar `Tensor` or `int` representing the desired long side to + be rescaled to. + padded_size: a `Tensor` or `int` list/tuple of two elements representing + [height, width] of the padded output image size. Padding will be applied + after scaling the image to the desired_size. + aug_scale_min: a `float` with range between [0, 1.0] representing minimum + random scale applied to desired_size for training scale jittering. + aug_scale_max: a `float` with range between [1.0, inf] representing maximum + random scale applied to desired_size for training scale jittering. + seed: seed for random scale jittering. + method: function to resize input image to scaled image. + Returns: + output_image: `Tensor` of shape [height, width, 3] where [height, width] + equals to `output_size`. + image_info: a 2D `Tensor` that encodes the information of the image and the + applied preprocessing. It is in the format of + [[original_height, original_width], [desired_height, desired_width], + [y_scale, x_scale], [y_offset, x_offset]], where [desired_height, + desired_width] is the actual scaled image size, and [y_scale, x_scale] is + the scaling factor, which is the ratio of + scaled dimension / original dimension. + """ + with tf.name_scope('resize_and_crop_image_v2'): + image_size = tf.cast(tf.shape(image)[0:2], tf.float32) + + scale_using_short_side = ( + short_side / tf.math.minimum(image_size[0], image_size[1])) + scale_using_long_side = ( + long_side / tf.math.maximum(image_size[0], image_size[1])) + + scaled_size = tf.math.round(image_size * scale_using_short_side) + scaled_size = tf.where( + tf.math.greater( + tf.math.maximum(scaled_size[0], scaled_size[1]), long_side), + tf.math.round(image_size * scale_using_long_side), + scaled_size) + desired_size = scaled_size + + random_jittering = (aug_scale_min != 1.0 or aug_scale_max != 1.0) + + if random_jittering: + random_scale = tf.random.uniform( + [], aug_scale_min, aug_scale_max, seed=seed) + scaled_size = tf.math.round(random_scale * scaled_size) + + # Computes 2D image_scale. + image_scale = scaled_size / image_size + + # Selects non-zero random offset (x, y) if scaled image is larger than + # desired_size. + if random_jittering: + max_offset = scaled_size - desired_size + max_offset = tf.where( + tf.math.less(max_offset, 0), tf.zeros_like(max_offset), max_offset) + offset = max_offset * tf.random.uniform([2, ], 0, 1, seed=seed) + offset = tf.cast(offset, tf.int32) + else: + offset = tf.zeros((2,), tf.int32) + + scaled_image = tf.image.resize( + image, tf.cast(scaled_size, tf.int32), method=method) + + if random_jittering: + scaled_image = scaled_image[ + offset[0]:offset[0] + desired_size[0], + offset[1]:offset[1] + desired_size[1], :] + + output_image = tf.image.pad_to_bounding_box( + scaled_image, 0, 0, padded_size[0], padded_size[1]) + + image_info = tf.stack([ + image_size, + tf.cast(desired_size, dtype=tf.float32), + image_scale, + tf.cast(offset, tf.float32)]) + return output_image, image_info def resize_image( - image: tf.Tensor, - size: Union[Tuple[int, int], int], - max_size: Optional[int] = None, - method: tf.image.ResizeMethod = tf.image.ResizeMethod.BILINEAR): - """Resize image with size and max_size. - - Args: - image: the image to be resized. - size: if list to tuple, resize to it. If scalar, we keep the same - aspect ratio and resize the short side to the value. - max_size: only used when size is a scalar. When the larger side is larger - than max_size after resized with size we used max_size to keep the aspect - ratio instead. - method: the method argument passed to tf.image.resize. - - Returns: - the resized image and image_info to be used for downstream processing. - image_info: a 2D `Tensor` that encodes the information of the image and the - applied preprocessing. It is in the format of - [[original_height, original_width], [resized_height, resized_width], - [y_scale, x_scale], [0, 0]], where [resized_height, resized_width] - is the actual scaled image size, and [y_scale, x_scale] is the - scaling factor, which is the ratio of - scaled dimension / original dimension. - """ - - def get_size_with_aspect_ratio(image_size, size, max_size=None): - h = image_size[0] - w = image_size[1] - if max_size is not None: - min_original_size = tf.cast(tf.math.minimum(w, h), dtype=tf.float32) - max_original_size = tf.cast(tf.math.maximum(w, h), dtype=tf.float32) - if max_original_size / min_original_size * size > max_size: - size = tf.cast( - tf.math.floor(max_size * min_original_size / max_original_size), - dtype=tf.int32) - else: - size = tf.cast(size, tf.int32) - - else: - size = tf.cast(size, tf.int32) - if (w <= h and w == size) or (h <= w and h == size): - return tf.stack([h, w]) - - if w < h: - ow = size - oh = tf.cast( - (tf.cast(size, dtype=tf.float32) * tf.cast(h, dtype=tf.float32) / - tf.cast(w, dtype=tf.float32)), - dtype=tf.int32) - else: - oh = size - ow = tf.cast( - (tf.cast(size, dtype=tf.float32) * tf.cast(w, dtype=tf.float32) / - tf.cast(h, dtype=tf.float32)), - dtype=tf.int32) - - return tf.stack([oh, ow]) - - def get_size(image_size, size, max_size=None): - if isinstance(size, (list, tuple)): - return size[::-1] - else: - return get_size_with_aspect_ratio(image_size, size, max_size) - - orignal_size = tf.shape(image)[0:2] - size = get_size(orignal_size, size, max_size) - rescaled_image = tf.image.resize( - image, tf.cast(size, tf.int32), method=method) - image_scale = size / orignal_size - image_info = tf.stack([ - tf.cast(orignal_size, dtype=tf.float32), - tf.cast(size, dtype=tf.float32), - tf.cast(image_scale, tf.float32), - tf.constant([0.0, 0.0], dtype=tf.float32) - ]) - return rescaled_image, image_info - - -def center_crop_image( - image, center_crop_fraction: float = CENTER_CROP_FRACTION): - """Center crop a square shape slice from the input image. - - It crops a square shape slice from the image. The side of the actual crop - is 224 / 256 = 0.875 of the short side of the original image. References: - [1] Very Deep Convolutional Networks for Large-Scale Image Recognition - https://arxiv.org/abs/1409.1556 - [2] Deep Residual Learning for Image Recognition - https://arxiv.org/abs/1512.03385 - - Args: - image: a Tensor of shape [height, width, 3] representing the input image. - center_crop_fraction: a float of ratio between the side of the cropped image - and the short side of the original image - - Returns: - cropped_image: a Tensor representing the center cropped image. - """ - with tf.name_scope('center_crop_image'): - image_size = tf.cast(tf.shape(image)[:2], dtype=tf.float32) - crop_size = ( - center_crop_fraction * tf.math.minimum(image_size[0], image_size[1])) - crop_offset = tf.cast((image_size - crop_size) / 2.0, dtype=tf.int32) - crop_size = tf.cast(crop_size, dtype=tf.int32) - cropped_image = image[ - crop_offset[0]:crop_offset[0] + crop_size, - crop_offset[1]:crop_offset[1] + crop_size, :] - return cropped_image - - -def center_crop_image_v2( - image_bytes, image_shape, center_crop_fraction: float = CENTER_CROP_FRACTION -): - """Center crop a square shape slice from the input image. - - It crops a square shape slice from the image. The side of the actual crop - is 224 / 256 = 0.875 of the short side of the original image. References: - [1] Very Deep Convolutional Networks for Large-Scale Image Recognition - https://arxiv.org/abs/1409.1556 - [2] Deep Residual Learning for Image Recognition - https://arxiv.org/abs/1512.03385 - - This is a faster version of `center_crop_image` which takes the original - image bytes and image size as the inputs, and partially decode the JPEG - bytes according to the center crop. - - Args: - image_bytes: a Tensor of type string representing the raw image bytes. - image_shape: a Tensor specifying the shape of the raw image. - center_crop_fraction: a float of ratio between the side of the cropped image - and the short side of the original image - - Returns: - cropped_image: a Tensor representing the center cropped image. - """ - with tf.name_scope('center_image_crop_v2'): - image_shape = tf.cast(image_shape, tf.float32) - crop_size = center_crop_fraction * tf.math.minimum( - image_shape[0], image_shape[1] - ) - crop_offset = tf.cast((image_shape - crop_size) / 2.0, dtype=tf.int32) - crop_size = tf.cast(crop_size, dtype=tf.int32) - crop_window = tf.stack( - [crop_offset[0], crop_offset[1], crop_size, crop_size]) - cropped_image = tf.image.decode_and_crop_jpeg( - image_bytes, crop_window, channels=3) - return cropped_image + image: tf.Tensor, + size: Union[Tuple[int, int], int], + max_size: Optional[int] = None, + method: tf.image.ResizeMethod = tf.image.ResizeMethod.BILINEAR): + """Resize image with size and max_size. + Args: + image: the image to be resized. + size: if list to tuple, resize to it. If scalar, we keep the same + aspect ratio and resize the short side to the value. + max_size: only used when size is a scalar. When the larger side is larger + than max_size after resized with size we used max_size to keep the aspect + ratio instead. + method: the method argument passed to tf.image.resize. + Returns: + the resized image and image_info to be used for downstream processing. + image_info: a 2D `Tensor` that encodes the information of the image and the + applied preprocessing. It is in the format of + [[original_height, original_width], [resized_height, resized_width], + [y_scale, x_scale], [0, 0]], where [resized_height, resized_width] + is the actual scaled image size, and [y_scale, x_scale] is the + scaling factor, which is the ratio of + scaled dimension / original dimension. + """ + + def get_size_with_aspect_ratio(image_size, size, max_size=None): + h = image_size[0] + w = image_size[1] + if max_size is not None: + min_original_size = tf.cast(tf.math.minimum(w, h), dtype=tf.float32) + max_original_size = tf.cast(tf.math.maximum(w, h), dtype=tf.float32) + if max_original_size / min_original_size * size > max_size: + size = tf.cast( + tf.math.floor(max_size * min_original_size / max_original_size), + dtype=tf.int32) + else: + size = tf.cast(size, tf.int32) + + else: + size = tf.cast(size, tf.int32) + if (w <= h and w == size) or (h <= w and h == size): + return tf.stack([h, w]) + + if w < h: + ow = size + oh = tf.cast( + (tf.cast(size, dtype=tf.float32) * tf.cast(h, dtype=tf.float32) / + tf.cast(w, dtype=tf.float32)), + dtype=tf.int32) + else: + oh = size + ow = tf.cast( + (tf.cast(size, dtype=tf.float32) * tf.cast(w, dtype=tf.float32) / + tf.cast(h, dtype=tf.float32)), + dtype=tf.int32) + + return tf.stack([oh, ow]) + + def get_size(image_size, size, max_size=None): + if isinstance(size, (list, tuple)): + return size[::-1] + else: + return get_size_with_aspect_ratio(image_size, size, max_size) + + orignal_size = tf.shape(image)[0:2] + size = get_size(orignal_size, size, max_size) + rescaled_image = tf.image.resize( + image, tf.cast(size, tf.int32), method=method) + image_scale = size / orignal_size + image_info = tf.stack([ + tf.cast(orignal_size, dtype=tf.float32), + tf.cast(size, dtype=tf.float32), + tf.cast(image_scale, tf.float32), + tf.constant([0.0, 0.0], dtype=tf.float32) + ]) + return rescaled_image, image_info + + +def center_crop_image(image): + """Center crop a square shape slice from the input image. + It crops a square shape slice from the image. The side of the actual crop + is 224 / 256 = 0.875 of the short side of the original image. References: + [1] Very Deep Convolutional Networks for Large-Scale Image Recognition + https://arxiv.org/abs/1409.1556 + [2] Deep Residual Learning for Image Recognition + https://arxiv.org/abs/1512.03385 + Args: + image: a Tensor of shape [height, width, 3] representing the input image. + Returns: + cropped_image: a Tensor representing the center cropped image. + """ + with tf.name_scope('center_crop_image'): + image_size = tf.cast(tf.shape(image)[:2], dtype=tf.float32) + crop_size = ( + CENTER_CROP_FRACTION * tf.math.minimum(image_size[0], image_size[1])) + crop_offset = tf.cast((image_size - crop_size) / 2.0, dtype=tf.int32) + crop_size = tf.cast(crop_size, dtype=tf.int32) + cropped_image = image[ + crop_offset[0]:crop_offset[0] + crop_size, + crop_offset[1]:crop_offset[1] + crop_size, :] + return cropped_image + + +def center_crop_image_v2(image_bytes, image_shape): + """Center crop a square shape slice from the input image. + It crops a square shape slice from the image. The side of the actual crop + is 224 / 256 = 0.875 of the short side of the original image. References: + [1] Very Deep Convolutional Networks for Large-Scale Image Recognition + https://arxiv.org/abs/1409.1556 + [2] Deep Residual Learning for Image Recognition + https://arxiv.org/abs/1512.03385 + This is a faster version of `center_crop_image` which takes the original + image bytes and image size as the inputs, and partially decode the JPEG + bytes according to the center crop. + Args: + image_bytes: a Tensor of type string representing the raw image bytes. + image_shape: a Tensor specifying the shape of the raw image. + Returns: + cropped_image: a Tensor representing the center cropped image. + """ + with tf.name_scope('center_image_crop_v2'): + image_shape = tf.cast(image_shape, tf.float32) + crop_size = ( + CENTER_CROP_FRACTION * tf.math.minimum(image_shape[0], image_shape[1])) + crop_offset = tf.cast((image_shape - crop_size) / 2.0, dtype=tf.int32) + crop_size = tf.cast(crop_size, dtype=tf.int32) + crop_window = tf.stack( + [crop_offset[0], crop_offset[1], crop_size, crop_size]) + cropped_image = tf.image.decode_and_crop_jpeg( + image_bytes, crop_window, channels=3) + return cropped_image def random_crop_image(image, @@ -497,34 +441,32 @@ def random_crop_image(image, area_range=(0.08, 1.0), max_attempts=10, seed=1): - """Randomly crop an arbitrary shaped slice from the input image. - - Args: - image: a Tensor of shape [height, width, 3] representing the input image. - aspect_ratio_range: a list of floats. The cropped area of the image must - have an aspect ratio = width / height within this range. - area_range: a list of floats. The cropped reas of the image must contain - a fraction of the input image within this range. - max_attempts: the number of attempts at generating a cropped region of the - image of the specified constraints. After max_attempts failures, return - the entire image. - seed: the seed of the random generator. - - Returns: - cropped_image: a Tensor representing the random cropped image. Can be the - original image if max_attempts is exhausted. - """ - with tf.name_scope('random_crop_image'): - crop_offset, crop_size, _ = tf.image.sample_distorted_bounding_box( - tf.shape(image), - tf.constant([0.0, 0.0, 1.0, 1.0], dtype=tf.float32, shape=[1, 1, 4]), - seed=seed, - min_object_covered=area_range[0], - aspect_ratio_range=aspect_ratio_range, - area_range=area_range, - max_attempts=max_attempts) - cropped_image = tf.slice(image, crop_offset, crop_size) - return cropped_image + """Randomly crop an arbitrary shaped slice from the input image. + Args: + image: a Tensor of shape [height, width, 3] representing the input image. + aspect_ratio_range: a list of floats. The cropped area of the image must + have an aspect ratio = width / height within this range. + area_range: a list of floats. The cropped reas of the image must contain + a fraction of the input image within this range. + max_attempts: the number of attempts at generating a cropped region of the + image of the specified constraints. After max_attempts failures, return + the entire image. + seed: the seed of the random generator. + Returns: + cropped_image: a Tensor representing the random cropped image. Can be the + original image if max_attempts is exhausted. + """ + with tf.name_scope('random_crop_image'): + crop_offset, crop_size, _ = tf.image.sample_distorted_bounding_box( + tf.shape(image), + tf.constant([0.0, 0.0, 1.0, 1.0], dtype=tf.float32, shape=[1, 1, 4]), + seed=seed, + min_object_covered=area_range[0], + aspect_ratio_range=aspect_ratio_range, + area_range=area_range, + max_attempts=max_attempts) + cropped_image = tf.slice(image, crop_offset, crop_size) + return cropped_image def random_crop_image_v2(image_bytes, @@ -533,223 +475,149 @@ def random_crop_image_v2(image_bytes, area_range=(0.08, 1.0), max_attempts=10, seed=1): - """Randomly crop an arbitrary shaped slice from the input image. - - This is a faster version of `random_crop_image` which takes the original - image bytes and image size as the inputs, and partially decode the JPEG - bytes according to the generated crop. - - Args: - image_bytes: a Tensor of type string representing the raw image bytes. - image_shape: a Tensor specifying the shape of the raw image. - aspect_ratio_range: a list of floats. The cropped area of the image must - have an aspect ratio = width / height within this range. - area_range: a list of floats. The cropped reas of the image must contain - a fraction of the input image within this range. - max_attempts: the number of attempts at generating a cropped region of the - image of the specified constraints. After max_attempts failures, return - the entire image. - seed: the seed of the random generator. - - Returns: - cropped_image: a Tensor representing the random cropped image. Can be the - original image if max_attempts is exhausted. - """ - with tf.name_scope('random_crop_image_v2'): - crop_offset, crop_size, _ = tf.image.sample_distorted_bounding_box( - image_shape, - tf.constant([0.0, 0.0, 1.0, 1.0], dtype=tf.float32, shape=[1, 1, 4]), - seed=seed, - min_object_covered=area_range[0], - aspect_ratio_range=aspect_ratio_range, - area_range=area_range, - max_attempts=max_attempts) - offset_y, offset_x, _ = tf.unstack(crop_offset) - crop_height, crop_width, _ = tf.unstack(crop_size) - crop_window = tf.stack([offset_y, offset_x, crop_height, crop_width]) - cropped_image = tf.image.decode_and_crop_jpeg( - image_bytes, crop_window, channels=3) - return cropped_image + """Randomly crop an arbitrary shaped slice from the input image. + This is a faster version of `random_crop_image` which takes the original + image bytes and image size as the inputs, and partially decode the JPEG + bytes according to the generated crop. + Args: + image_bytes: a Tensor of type string representing the raw image bytes. + image_shape: a Tensor specifying the shape of the raw image. + aspect_ratio_range: a list of floats. The cropped area of the image must + have an aspect ratio = width / height within this range. + area_range: a list of floats. The cropped reas of the image must contain + a fraction of the input image within this range. + max_attempts: the number of attempts at generating a cropped region of the + image of the specified constraints. After max_attempts failures, return + the entire image. + seed: the seed of the random generator. + Returns: + cropped_image: a Tensor representing the random cropped image. Can be the + original image if max_attempts is exhausted. + """ + with tf.name_scope('random_crop_image_v2'): + crop_offset, crop_size, _ = tf.image.sample_distorted_bounding_box( + image_shape, + tf.constant([0.0, 0.0, 1.0, 1.0], dtype=tf.float32, shape=[1, 1, 4]), + seed=seed, + min_object_covered=area_range[0], + aspect_ratio_range=aspect_ratio_range, + area_range=area_range, + max_attempts=max_attempts) + offset_y, offset_x, _ = tf.unstack(crop_offset) + crop_height, crop_width, _ = tf.unstack(crop_size) + crop_window = tf.stack([offset_y, offset_x, crop_height, crop_width]) + cropped_image = tf.image.decode_and_crop_jpeg( + image_bytes, crop_window, channels=3) + return cropped_image def resize_and_crop_boxes(boxes, image_scale, output_size, offset): - """Resizes boxes to output size with scale and offset. - - Args: - boxes: `Tensor` of shape [N, 4] representing ground truth boxes. - image_scale: 2D float `Tensor` representing scale factors that apply to - [height, width] of input image. - output_size: 2D `Tensor` or `int` representing [height, width] of target - output image size. - offset: 2D `Tensor` representing top-left corner [y0, x0] to crop scaled - boxes. - - Returns: - boxes: `Tensor` of shape [N, 4] representing the scaled boxes. - """ - with tf.name_scope('resize_and_crop_boxes'): - # Adjusts box coordinates based on image_scale and offset. - boxes *= tf.tile(tf.expand_dims(image_scale, axis=0), [1, 2]) - boxes -= tf.tile(tf.expand_dims(offset, axis=0), [1, 2]) - # Clips the boxes. - boxes = box_ops.clip_boxes(boxes, output_size) - return boxes - - -def resize_and_crop_masks(masks, image_scale, output_size, offset): - """Resizes boxes to output size with scale and offset. - - Args: - masks: `Tensor` of shape [N, H, W, C] representing ground truth masks. - image_scale: 2D float `Tensor` representing scale factors that apply to - [height, width] of input image. - output_size: 2D `Tensor` or `int` representing [height, width] of target - output image size. - offset: 2D `Tensor` representing top-left corner [y0, x0] to crop scaled - boxes. - - Returns: - masks: `Tensor` of shape [N, H, W, C] representing the scaled masks. - """ - with tf.name_scope('resize_and_crop_masks'): - mask_size = tf.cast(tf.shape(masks)[1:3], tf.float32) - num_channels = tf.shape(masks)[3] - # Pad masks to avoid empty mask annotations. - masks = tf.concat([ - tf.zeros([1, mask_size[0], mask_size[1], num_channels], - dtype=masks.dtype), masks - ], - axis=0) - - scaled_size = tf.cast(image_scale * mask_size, tf.int32) - scaled_masks = tf.image.resize( - masks, scaled_size, method=tf.image.ResizeMethod.NEAREST_NEIGHBOR) - offset = tf.cast(offset, tf.int32) - scaled_masks = scaled_masks[ - :, - offset[0]:offset[0] + output_size[0], - offset[1]:offset[1] + output_size[1], - :] - - output_masks = tf.image.pad_to_bounding_box( - scaled_masks, 0, 0, output_size[0], output_size[1]) - # Remove padding. - output_masks = output_masks[1::] - return output_masks + """Resizes boxes to output size with scale and offset. + Args: + boxes: `Tensor` of shape [N, 4] representing ground truth boxes. + image_scale: 2D float `Tensor` representing scale factors that apply to + [height, width] of input image. + output_size: 2D `Tensor` or `int` representing [height, width] of target + output image size. + offset: 2D `Tensor` representing top-left corner [y0, x0] to crop scaled + boxes. + Returns: + boxes: `Tensor` of shape [N, 4] representing the scaled boxes. + """ + with tf.name_scope('resize_and_crop_boxes'): + # Adjusts box coordinates based on image_scale and offset. + boxes *= tf.tile(tf.expand_dims(image_scale, axis=0), [1, 2]) + boxes -= tf.tile(tf.expand_dims(offset, axis=0), [1, 2]) + # Clips the boxes. + boxes = box_ops.clip_boxes(boxes, output_size) + return boxes + + +def resize_and_crop_masks(masks, + image_scale, + output_size, + offset): + """Resizes boxes to output size with scale and offset. + Args: + masks: `Tensor` of shape [N, H, W, 1] representing ground truth masks. + image_scale: 2D float `Tensor` representing scale factors that apply to + [height, width] of input image. + output_size: 2D `Tensor` or `int` representing [height, width] of target + output image size. + offset: 2D `Tensor` representing top-left corner [y0, x0] to crop scaled + boxes. + Returns: + masks: `Tensor` of shape [N, H, W, 1] representing the scaled masks. + """ + with tf.name_scope('resize_and_crop_masks'): + mask_size = tf.cast(tf.shape(masks)[1:3], tf.float32) + + # Pad masks to avoid empty mask annotations. + masks = tf.concat([tf.zeros([1, mask_size[0], mask_size[1], 1]), masks], axis=0) + + scaled_size = tf.cast(image_scale * mask_size, tf.int32) + + scaled_masks = tf.image.resize( + masks, scaled_size, method=tf.image.ResizeMethod.NEAREST_NEIGHBOR) + offset = tf.cast(offset, tf.int32) + scaled_masks = scaled_masks[ + :, + offset[0]:offset[0] + output_size[0], + offset[1]:offset[1] + output_size[1], + :] + output_masks = tf.image.pad_to_bounding_box( + scaled_masks, 0, 0, output_size[0], output_size[1]) + # Remove padding. + output_masks = output_masks[1::] + return output_masks def horizontal_flip_image(image): - """Flips image horizontally.""" - return tf.image.flip_left_right(image) + """Flips image horizontally.""" + return tf.image.flip_left_right(image) + + +def horizontal_flip_boxes(normalized_boxes): + """Flips normalized boxes horizontally.""" + ymin, xmin, ymax, xmax = tf.split( + value=normalized_boxes, num_or_size_splits=4, axis=1) + flipped_xmin = tf.subtract(1.0, xmax) + flipped_xmax = tf.subtract(1.0, xmin) + flipped_boxes = tf.concat([ymin, flipped_xmin, ymax, flipped_xmax], 1) + return flipped_boxes def horizontal_flip_masks(masks): - """Flips masks horizontally.""" - return masks[:, :, ::-1] - - -def random_horizontal_flip( - image, normalized_boxes=None, masks=None, seed=1, prob=0.5 -): - """Randomly flips input image and bounding boxes horizontally.""" - with tf.name_scope('random_horizontal_flip'): - do_flip = tf.less(tf.random.uniform([], seed=seed), prob) - - image = tf.cond( - do_flip, - lambda: horizontal_flip_image(image), - lambda: image) - - if normalized_boxes is not None: - normalized_boxes = tf.cond( - do_flip, - lambda: horizontal_flip_boxes(normalized_boxes), - lambda: normalized_boxes) - - if masks is not None: - masks = tf.cond( - do_flip, - lambda: horizontal_flip_masks(masks), - lambda: masks) - - return image, normalized_boxes, masks - - -def random_horizontal_flip_with_roi( - image: tf.Tensor, - boxes: Optional[tf.Tensor] = None, - masks: Optional[tf.Tensor] = None, - roi_boxes: Optional[tf.Tensor] = None, - seed: int = 1 -) -> Tuple[tf.Tensor, Optional[tf.Tensor], Optional[tf.Tensor], - Optional[tf.Tensor]]: - """Randomly flips input image and bounding boxes horizontally. - - Extends preprocess_ops.random_horizontal_flip to also flip roi_boxes used - by ViLD. - - Args: - image: `tf.Tensor`, the image to apply the random flip. - boxes: `tf.Tensor` or `None`, boxes corresponding to the image. - masks: `tf.Tensor` or `None`, masks corresponding to the image. - roi_boxes: `tf.Tensor` or `None`, RoIs corresponding to the image. - seed: Seed for Tensorflow's random number generator. - - Returns: - image: `tf.Tensor`, flipped image. - boxes: `tf.Tensor` or `None`, flipped boxes corresponding to the image. - masks: `tf.Tensor` or `None`, flipped masks corresponding to the image. - roi_boxes: `tf.Tensor` or `None`, flipped RoIs corresponding to the image. - """ - with tf.name_scope('random_horizontal_flip'): - do_flip = tf.greater(tf.random.uniform([], seed=seed), 0.5) - - image = tf.cond(do_flip, lambda: horizontal_flip_image(image), - lambda: image) - - if boxes is not None: - boxes = tf.cond(do_flip, lambda: horizontal_flip_boxes(boxes), - lambda: boxes) - - if masks is not None: - masks = tf.cond(do_flip, lambda: horizontal_flip_masks(masks), - lambda: masks) - - if roi_boxes is not None: - roi_boxes = tf.cond(do_flip, lambda: horizontal_flip_boxes(roi_boxes), - lambda: roi_boxes) - - return image, boxes, masks, roi_boxes - - -def random_vertical_flip( - image, normalized_boxes=None, masks=None, seed=1, prob=0.5 -): - """Randomly flips input image and bounding boxes vertically.""" - with tf.name_scope('random_vertical_flip'): - do_flip = tf.less(tf.random.uniform([], seed=seed), prob) - - image = tf.cond( - do_flip, - lambda: tf.image.flip_up_down(image), - lambda: image) - - if normalized_boxes is not None: - normalized_boxes = tf.cond( - do_flip, - lambda: vertical_flip_boxes(normalized_boxes), - lambda: normalized_boxes) - - if masks is not None: - masks = tf.cond( - do_flip, - lambda: tf.image.flip_up_down(masks[..., None])[..., 0], - lambda: masks) - - return image, normalized_boxes, masks + """Flips masks horizontally.""" + return masks[:, :, ::-1] + + +def random_horizontal_flip(image, normalized_boxes=None, masks=None, seed=None): + """Randomly flips input image and bounding boxes.""" + with tf.name_scope('random_horizontal_flip'): + do_flip = tf.greater(tf.random.uniform([], seed=seed), 0.5) + + image = tf.cond( + do_flip, + lambda: horizontal_flip_image(image), + lambda: image) + + if normalized_boxes is not None: + normalized_boxes = tf.cond( + do_flip, + lambda: horizontal_flip_boxes(normalized_boxes), + lambda: normalized_boxes) + + if masks is not None: + masks = tf.cond( + do_flip, + lambda: horizontal_flip_masks(masks), + lambda: masks) + + return image, normalized_boxes, masks def color_jitter(image: tf.Tensor, @@ -757,237 +625,297 @@ def color_jitter(image: tf.Tensor, contrast: Optional[float] = 0., saturation: Optional[float] = 0., seed: Optional[int] = None) -> tf.Tensor: - """Applies color jitter to an image, similarly to torchvision`s ColorJitter. - - Args: - image (tf.Tensor): Of shape [height, width, 3] and type uint8. - brightness (float, optional): Magnitude for brightness jitter. Defaults to - 0. - contrast (float, optional): Magnitude for contrast jitter. Defaults to 0. - saturation (float, optional): Magnitude for saturation jitter. Defaults to - 0. - seed (int, optional): Random seed. Defaults to None. - - Returns: - tf.Tensor: The augmented `image` of type uint8. - """ - image = tf.cast(image, dtype=tf.uint8) - image = random_brightness(image, brightness, seed=seed) - image = random_contrast(image, contrast, seed=seed) - image = random_saturation(image, saturation, seed=seed) - return image - - -def random_brightness(image: tf.Tensor, - brightness: float = 0., - seed: Optional[int] = None) -> tf.Tensor: - """Jitters brightness of an image. - - Args: + """Applies color jitter to an image, similarly to torchvision`s ColorJitter. + Args: image (tf.Tensor): Of shape [height, width, 3] and type uint8. brightness (float, optional): Magnitude for brightness jitter. Defaults to 0. + contrast (float, optional): Magnitude for contrast jitter. Defaults to 0. + saturation (float, optional): Magnitude for saturation jitter. Defaults to + 0. seed (int, optional): Random seed. Defaults to None. - - Returns: + Returns: tf.Tensor: The augmented `image` of type uint8. - """ - assert brightness >= 0, '`brightness` must be positive' - brightness = tf.random.uniform([], - max(0, 1 - brightness), - 1 + brightness, - seed=seed, - dtype=tf.float32) - return augment.brightness(image, brightness) + """ + image = tf.cast(image, dtype=tf.uint8) + image = random_brightness(image, brightness, seed=seed) + image = random_contrast(image, contrast, seed=seed) + image = random_saturation(image, saturation, seed=seed) + return image + + +def random_brightness(image: tf.Tensor, + brightness: float = 0., + seed: Optional[int] = None) -> tf.Tensor: + """Jitters brightness of an image. + Args: + image (tf.Tensor): Of shape [height, width, 3] and type uint8. + brightness (float, optional): Magnitude for brightness jitter. Defaults to + 0. + seed (int, optional): Random seed. Defaults to None. + Returns: + tf.Tensor: The augmented `image` of type uint8. + """ + assert brightness >= 0, '`brightness` must be positive' + brightness = tf.random.uniform([], + max(0, 1 - brightness), + 1 + brightness, + seed=seed, + dtype=tf.float32) + return augment.brightness(image, brightness) def random_contrast(image: tf.Tensor, contrast: float = 0., seed: Optional[int] = None) -> tf.Tensor: - """Jitters contrast of an image, similarly to torchvision`s ColorJitter. - - Args: - image (tf.Tensor): Of shape [height, width, 3] and type uint8. - contrast (float, optional): Magnitude for contrast jitter. Defaults to 0. - seed (int, optional): Random seed. Defaults to None. - - Returns: - tf.Tensor: The augmented `image` of type uint8. - """ - assert contrast >= 0, '`contrast` must be positive' - contrast = tf.random.uniform([], - max(0, 1 - contrast), - 1 + contrast, - seed=seed, - dtype=tf.float32) - return augment.contrast(image, contrast) + """Jitters contrast of an image, similarly to torchvision`s ColorJitter. + Args: + image (tf.Tensor): Of shape [height, width, 3] and type uint8. + contrast (float, optional): Magnitude for contrast jitter. Defaults to 0. + seed (int, optional): Random seed. Defaults to None. + Returns: + tf.Tensor: The augmented `image` of type uint8. + """ + assert contrast >= 0, '`contrast` must be positive' + contrast = tf.random.uniform([], + max(0, 1 - contrast), + 1 + contrast, + seed=seed, + dtype=tf.float32) + return augment.contrast(image, contrast) def random_saturation(image: tf.Tensor, saturation: float = 0., seed: Optional[int] = None) -> tf.Tensor: - """Jitters saturation of an image, similarly to torchvision`s ColorJitter. - - Args: - image (tf.Tensor): Of shape [height, width, 3] and type uint8. - saturation (float, optional): Magnitude for saturation jitter. Defaults to - 0. - seed (int, optional): Random seed. Defaults to None. - - Returns: - tf.Tensor: The augmented `image` of type uint8. - """ - assert saturation >= 0, '`saturation` must be positive' - saturation = tf.random.uniform([], - max(0, 1 - saturation), - 1 + saturation, - seed=seed, - dtype=tf.float32) - return _saturation(image, saturation) + """Jitters saturation of an image, similarly to torchvision`s ColorJitter. + Args: + image (tf.Tensor): Of shape [height, width, 3] and type uint8. + saturation (float, optional): Magnitude for saturation jitter. Defaults to + 0. + seed (int, optional): Random seed. Defaults to None. + Returns: + tf.Tensor: The augmented `image` of type uint8. + """ + assert saturation >= 0, '`saturation` must be positive' + saturation = tf.random.uniform([], + max(0, 1 - saturation), + 1 + saturation, + seed=seed, + dtype=tf.float32) + return _saturation(image, saturation) def _saturation(image: tf.Tensor, saturation: Optional[float] = 0.) -> tf.Tensor: - return augment.blend( - tf.repeat(tf.image.rgb_to_grayscale(image), 3, axis=-1), image, - saturation) + return augment.blend( + tf.repeat(tf.image.rgb_to_grayscale(image), 3, axis=-1), image, + saturation) -def random_crop_image_with_boxes_and_labels(img, boxes, labels, min_scale, +def random_crop_image_with_boxes_and_labels(img, + boxes, + labels, + min_scale, aspect_ratio_range, - min_overlap_params, max_retry): - """Crops a random slice from the input image. - - The function will correspondingly recompute the bounding boxes and filter out - outside boxes and their labels. - - References: - [1] End-to-End Object Detection with Transformers - https://arxiv.org/abs/2005.12872 - - The preprocessing steps: - 1. Sample a minimum IoU overlap. - 2. For each trial, sample the new image width, height, and top-left corner. - 3. Compute the IoUs of bounding boxes with the cropped image and retry if - the maximum IoU is below the sampled threshold. - 4. Find boxes whose centers are in the cropped image. - 5. Compute new bounding boxes in the cropped region and only select those - boxes' labels. - - Args: - img: a 'Tensor' of shape [height, width, 3] representing the input image. - boxes: a 'Tensor' of shape [N, 4] representing the ground-truth bounding - boxes with (ymin, xmin, ymax, xmax). - labels: a 'Tensor' of shape [N,] representing the class labels of the boxes. - min_scale: a 'float' in [0.0, 1.0) indicating the lower bound of the random - scale variable. - aspect_ratio_range: a list of two 'float' that specifies the lower and upper - bound of the random aspect ratio. - min_overlap_params: a list of four 'float' representing the min value, max - value, step size, and offset for the minimum overlap sample. - max_retry: an 'int' representing the number of trials for cropping. If it is - exhausted, no cropping will be performed. - - Returns: - img: a Tensor representing the random cropped image. Can be the - original image if max_retry is exhausted. - boxes: a Tensor representing the bounding boxes in the cropped image. - labels: a Tensor representing the new bounding boxes' labels. - """ - - shape = tf.shape(img) - original_h = shape[0] - original_w = shape[1] - - minval, maxval, step, offset = min_overlap_params - - min_overlap = tf.math.floordiv( - tf.random.uniform([], minval=minval, maxval=maxval), step) * step - offset - - min_overlap = tf.clip_by_value(min_overlap, 0.0, 1.1) - - if min_overlap > 1.0: - return img, boxes, labels - - aspect_ratio_low = aspect_ratio_range[0] - aspect_ratio_high = aspect_ratio_range[1] - - for _ in tf.range(max_retry): - scale_h = tf.random.uniform([], min_scale, 1.0) - scale_w = tf.random.uniform([], min_scale, 1.0) - new_h = tf.cast( - scale_h * tf.cast(original_h, dtype=tf.float32), dtype=tf.int32) - new_w = tf.cast( - scale_w * tf.cast(original_w, dtype=tf.float32), dtype=tf.int32) - - # Aspect ratio has to be in the prespecified range - aspect_ratio = new_h / new_w - if aspect_ratio_low > aspect_ratio or aspect_ratio > aspect_ratio_high: - continue - - left = tf.random.uniform([], 0, original_w - new_w, dtype=tf.int32) - right = left + new_w - top = tf.random.uniform([], 0, original_h - new_h, dtype=tf.int32) - bottom = top + new_h - - normalized_left = tf.cast( - left, dtype=tf.float32) / tf.cast( + min_overlap_params, + max_retry): + """Crops a random slice from the input image. + The function will correspondingly recompute the bounding boxes and filter out + outside boxes and their labels. + References: + [1] End-to-End Object Detection with Transformers + https://arxiv.org/abs/2005.12872 + The preprocessing steps: + 1. Sample a minimum IoU overlap. + 2. For each trial, sample the new image width, height, and top-left corner. + 3. Compute the IoUs of bounding boxes with the cropped image and retry if + the maximum IoU is below the sampled threshold. + 4. Find boxes whose centers are in the cropped image. + 5. Compute new bounding boxes in the cropped region and only select those + boxes' labels. + Args: + img: a 'Tensor' of shape [height, width, 3] representing the input image. + boxes: a 'Tensor' of shape [N, 4] representing the ground-truth bounding + boxes with (ymin, xmin, ymax, xmax). + labels: a 'Tensor' of shape [N,] representing the class labels of the boxes. + min_scale: a 'float' in [0.0, 1.0) indicating the lower bound of the random + scale variable. + aspect_ratio_range: a list of two 'float' that specifies the lower and upper + bound of the random aspect ratio. + min_overlap_params: a list of four 'float' representing the min value, max + value, step size, and offset for the minimum overlap sample. + max_retry: an 'int' representing the number of trials for cropping. If it is + exhausted, no cropping will be performed. + Returns: + img: a Tensor representing the random cropped image. Can be the + original image if max_retry is exhausted. + boxes: a Tensor representing the bounding boxes in the cropped image. + labels: a Tensor representing the new bounding boxes' labels. + """ + + shape = tf.shape(img) + original_h = shape[0] + original_w = shape[1] + + minval, maxval, step, offset = min_overlap_params + + min_overlap = tf.math.floordiv( + tf.random.uniform([], minval=minval, maxval=maxval), step) * step - offset + + min_overlap = tf.clip_by_value(min_overlap, 0.0, 1.1) + + if min_overlap > 1.0: + return img, boxes, labels + + aspect_ratio_low = aspect_ratio_range[0] + aspect_ratio_high = aspect_ratio_range[1] + + for _ in tf.range(max_retry): + scale_h = tf.random.uniform([], min_scale, 1.0) + scale_w = tf.random.uniform([], min_scale, 1.0) + new_h = tf.cast( + scale_h * tf.cast(original_h, dtype=tf.float32), dtype=tf.int32) + new_w = tf.cast( + scale_w * tf.cast(original_w, dtype=tf.float32), dtype=tf.int32) + + # Aspect ratio has to be in the prespecified range + aspect_ratio = new_h / new_w + if aspect_ratio_low > aspect_ratio or aspect_ratio > aspect_ratio_high: + continue + + left = tf.random.uniform([], 0, original_w - new_w, dtype=tf.int32) + right = left + new_w + top = tf.random.uniform([], 0, original_h - new_h, dtype=tf.int32) + bottom = top + new_h + + normalized_left = tf.cast( + left, dtype=tf.float32) / tf.cast( original_w, dtype=tf.float32) - normalized_right = tf.cast( - right, dtype=tf.float32) / tf.cast( + normalized_right = tf.cast( + right, dtype=tf.float32) / tf.cast( original_w, dtype=tf.float32) - normalized_top = tf.cast( - top, dtype=tf.float32) / tf.cast( + normalized_top = tf.cast( + top, dtype=tf.float32) / tf.cast( original_h, dtype=tf.float32) - normalized_bottom = tf.cast( - bottom, dtype=tf.float32) / tf.cast( + normalized_bottom = tf.cast( + bottom, dtype=tf.float32) / tf.cast( original_h, dtype=tf.float32) - cropped_box = tf.expand_dims( - tf.stack([ - normalized_top, - normalized_left, - normalized_bottom, - normalized_right, - ]), - axis=0) - iou = box_ops.bbox_overlap( - tf.expand_dims(cropped_box, axis=0), - tf.expand_dims(boxes, axis=0)) # (1, 1, n_ground_truth) - iou = tf.squeeze(iou, axis=[0, 1]) - - # If not a single bounding box has a Jaccard overlap of greater than - # the minimum, try again - if tf.reduce_max(iou) < min_overlap: - continue - - centroids = box_ops.yxyx_to_cycxhw(boxes) - mask = tf.math.logical_and( - tf.math.logical_and(centroids[:, 0] > normalized_top, - centroids[:, 0] < normalized_bottom), - tf.math.logical_and(centroids[:, 1] > normalized_left, - centroids[:, 1] < normalized_right)) - # If not a single bounding box has its center in the crop, try again. - if tf.reduce_sum(tf.cast(mask, dtype=tf.int32)) > 0: - indices = tf.squeeze(tf.where(mask), axis=1) - - filtered_boxes = tf.gather(boxes, indices) - - boxes = tf.clip_by_value( - (filtered_boxes[..., :] * tf.cast( - tf.stack([original_h, original_w, original_h, original_w]), - dtype=tf.float32) - - tf.cast(tf.stack([top, left, top, left]), dtype=tf.float32)) / - tf.cast(tf.stack([new_h, new_w, new_h, new_w]), dtype=tf.float32), - 0.0, 1.0) - - img = tf.image.crop_to_bounding_box(img, top, left, bottom - top, - right - left) - - labels = tf.gather(labels, indices) - break - - return img, boxes, labels + cropped_box = tf.expand_dims( + tf.stack([ + normalized_top, + normalized_left, + normalized_bottom, + normalized_right, + ]), + axis=0) + iou = box_ops.bbox_overlap( + tf.expand_dims(cropped_box, axis=0), + tf.expand_dims(boxes, axis=0)) # (1, 1, n_ground_truth) + iou = tf.squeeze(iou, axis=[0, 1]) + + # If not a single bounding box has a Jaccard overlap of greater than + # the minimum, try again + if tf.reduce_max(iou) < min_overlap: + continue + + centroids = box_ops.yxyx_to_cycxhw(boxes) + mask = tf.math.logical_and( + tf.math.logical_and(centroids[:, 0] > normalized_top, + centroids[:, 0] < normalized_bottom), + tf.math.logical_and(centroids[:, 1] > normalized_left, + centroids[:, 1] < normalized_right)) + # If not a single bounding box has its center in the crop, try again. + if tf.reduce_sum(tf.cast(mask, dtype=tf.int32)) > 0: + indices = tf.squeeze(tf.where(mask), axis=1) + + filtered_boxes = tf.gather(boxes, indices) + + boxes = tf.clip_by_value( + (filtered_boxes[..., :] * tf.cast( + tf.stack([original_h, original_w, original_h, original_w]), + dtype=tf.float32) - + tf.cast(tf.stack([top, left, top, left]), dtype=tf.float32)) / + tf.cast(tf.stack([new_h, new_w, new_h, new_w]), dtype=tf.float32), + 0.0, 1.0) + + img = tf.image.crop_to_bounding_box(img, top, left, bottom - top, + right - left) + + labels = tf.gather(labels, indices) + break + + return img, boxes, labels + + +def random_crop_image_masks(img, + masks, + min_scale=0.3, + aspect_ratio_range=(0.5, 2.0), + min_overlap_params=(0.0, 1.4, 0.2, 0.1), + max_retry=50, + seed=None): + """Randomly crop the image and masks + Args: + image: a 'Tensor' of shape [height, width, 3] representing the input image. + masks: a 'Tensor' of shape [N, height, width, C] representing N masks with C channels + min_scale: a 'float' in [0.0, 1.0) indicating the lower bound of the random + scale variable. + aspect_ratio_range: a list of two 'float' that specifies the lower and upper + bound of the random aspect ratio. + min_overlap_params: a list of four 'float' representing the min value, max + value, step size, and offset for the minimum overlap sample. + max_retry: an 'int' representing the number of trials for cropping. If it is + exhausted, no cropping will be performed. + seed: the random number seed of int, but could be None. + Returns: + image: a Tensor representing the random cropped image. Can be the + original image if max_retry is exhausted. + masks: a Tensor representing the masks in the cropped image. + """ + + shape = tf.shape(img) + + original_h = shape[0] + original_w = shape[1] + + minval, maxval, step, offset = min_overlap_params + + min_overlap = tf.math.floordiv( + tf.random.uniform([], minval=minval, maxval=maxval), step) * step - offset + + min_overlap = tf.clip_by_value(min_overlap, 0.0, 1.1) + + if min_overlap > 1.0: + return img, masks + + aspect_ratio_low = aspect_ratio_range[0] + aspect_ratio_high = aspect_ratio_range[1] + + for _ in tf.range(max_retry): + scale_h = tf.random.uniform([], min_scale, 1.0) + scale_w = tf.random.uniform([], min_scale, 1.0) + new_h = tf.cast( + scale_h * tf.cast(original_h, dtype=tf.float32), dtype=tf.int32) + new_w = tf.cast( + scale_w * tf.cast(original_w, dtype=tf.float32), dtype=tf.int32) + + # Aspect ratio has to be in the prespecified range + aspect_ratio = new_h / new_w + if aspect_ratio_low > aspect_ratio or aspect_ratio > aspect_ratio_high: + continue + + left = tf.random.uniform([], 0, original_w - new_w, dtype=tf.int32) + right = left + new_w + top = tf.random.uniform([], 0, original_h - new_h, dtype=tf.int32) + bottom = top + new_h + + img = tf.image.crop_to_bounding_box(img, top, left, bottom - top, right - left) + masks = tf.image.crop_to_bounding_box(masks, top, left, bottom - top, right - left) + break + + return img, masks def random_crop(image, @@ -998,36 +926,34 @@ def random_crop(image, min_overlap_params=(0.0, 1.4, 0.2, 0.1), max_retry=50, seed=None): - """Randomly crop the image and boxes, filtering labels. - - Args: - image: a 'Tensor' of shape [height, width, 3] representing the input image. - boxes: a 'Tensor' of shape [N, 4] representing the ground-truth bounding - boxes with (ymin, xmin, ymax, xmax). - labels: a 'Tensor' of shape [N,] representing the class labels of the boxes. - min_scale: a 'float' in [0.0, 1.0) indicating the lower bound of the random - scale variable. - aspect_ratio_range: a list of two 'float' that specifies the lower and upper - bound of the random aspect ratio. - min_overlap_params: a list of four 'float' representing the min value, max - value, step size, and offset for the minimum overlap sample. - max_retry: an 'int' representing the number of trials for cropping. If it is - exhausted, no cropping will be performed. - seed: the random number seed of int, but could be None. - - Returns: - image: a Tensor representing the random cropped image. Can be the - original image if max_retry is exhausted. - boxes: a Tensor representing the bounding boxes in the cropped image. - labels: a Tensor representing the new bounding boxes' labels. - """ - with tf.name_scope('random_crop'): - do_crop = tf.greater(tf.random.uniform([], seed=seed), 0.5) - if do_crop: - return random_crop_image_with_boxes_and_labels(image, boxes, labels, - min_scale, - aspect_ratio_range, - min_overlap_params, - max_retry) - else: - return image, boxes, labels + """Randomly crop the image and boxes, filtering labels. + Args: + image: a 'Tensor' of shape [height, width, 3] representing the input image. + boxes: a 'Tensor' of shape [N, 4] representing the ground-truth bounding + boxes with (ymin, xmin, ymax, xmax). + labels: a 'Tensor' of shape [N,] representing the class labels of the boxes. + min_scale: a 'float' in [0.0, 1.0) indicating the lower bound of the random + scale variable. + aspect_ratio_range: a list of two 'float' that specifies the lower and upper + bound of the random aspect ratio. + min_overlap_params: a list of four 'float' representing the min value, max + value, step size, and offset for the minimum overlap sample. + max_retry: an 'int' representing the number of trials for cropping. If it is + exhausted, no cropping will be performed. + seed: the random number seed of int, but could be None. + Returns: + image: a Tensor representing the random cropped image. Can be the + original image if max_retry is exhausted. + boxes: a Tensor representing the bounding boxes in the cropped image. + labels: a Tensor representing the new bounding boxes' labels. + """ + with tf.name_scope('random_crop'): + do_crop = tf.greater(tf.random.uniform([], seed=seed), 0.5) + if do_crop: + return random_crop_image_with_boxes_and_labels(image, boxes, labels, + min_scale, + aspect_ratio_range, + min_overlap_params, + max_retry) + else: + return image, boxes, labels diff --git a/models/official/vision/tasks/__pycache__/__init__.cpython-39.pyc b/models/official/vision/tasks/__pycache__/__init__.cpython-39.pyc index 197cb54f..8667ae9f 100644 Binary files a/models/official/vision/tasks/__pycache__/__init__.cpython-39.pyc and b/models/official/vision/tasks/__pycache__/__init__.cpython-39.pyc differ diff --git a/models/official/vision/tasks/__pycache__/image_classification.cpython-39.pyc b/models/official/vision/tasks/__pycache__/image_classification.cpython-39.pyc index 4e006a0e..490680fc 100644 Binary files a/models/official/vision/tasks/__pycache__/image_classification.cpython-39.pyc and b/models/official/vision/tasks/__pycache__/image_classification.cpython-39.pyc differ diff --git a/models/official/vision/tasks/__pycache__/maskrcnn.cpython-39.pyc b/models/official/vision/tasks/__pycache__/maskrcnn.cpython-39.pyc index 98c62c16..6c11d2f3 100644 Binary files a/models/official/vision/tasks/__pycache__/maskrcnn.cpython-39.pyc and b/models/official/vision/tasks/__pycache__/maskrcnn.cpython-39.pyc differ diff --git a/models/official/vision/tasks/__pycache__/retinanet.cpython-39.pyc b/models/official/vision/tasks/__pycache__/retinanet.cpython-39.pyc index 1ecf9158..5f962117 100644 Binary files a/models/official/vision/tasks/__pycache__/retinanet.cpython-39.pyc and b/models/official/vision/tasks/__pycache__/retinanet.cpython-39.pyc differ diff --git a/models/official/vision/tasks/__pycache__/semantic_segmentation.cpython-39.pyc b/models/official/vision/tasks/__pycache__/semantic_segmentation.cpython-39.pyc index 11644322..afccd89b 100644 Binary files a/models/official/vision/tasks/__pycache__/semantic_segmentation.cpython-39.pyc and b/models/official/vision/tasks/__pycache__/semantic_segmentation.cpython-39.pyc differ diff --git a/models/official/vision/tasks/__pycache__/video_classification.cpython-39.pyc b/models/official/vision/tasks/__pycache__/video_classification.cpython-39.pyc index 75452377..84d853c1 100644 Binary files a/models/official/vision/tasks/__pycache__/video_classification.cpython-39.pyc and b/models/official/vision/tasks/__pycache__/video_classification.cpython-39.pyc differ diff --git a/models/official/vision/utils/__pycache__/__init__.cpython-39.pyc b/models/official/vision/utils/__pycache__/__init__.cpython-39.pyc index 3d37eedd..075df204 100644 Binary files a/models/official/vision/utils/__pycache__/__init__.cpython-39.pyc and b/models/official/vision/utils/__pycache__/__init__.cpython-39.pyc differ diff --git a/models/official/vision/utils/object_detection/__pycache__/__init__.cpython-39.pyc b/models/official/vision/utils/object_detection/__pycache__/__init__.cpython-39.pyc index c6430325..b41932be 100644 Binary files a/models/official/vision/utils/object_detection/__pycache__/__init__.cpython-39.pyc and b/models/official/vision/utils/object_detection/__pycache__/__init__.cpython-39.pyc differ diff --git a/models/official/vision/utils/object_detection/__pycache__/balanced_positive_negative_sampler.cpython-39.pyc b/models/official/vision/utils/object_detection/__pycache__/balanced_positive_negative_sampler.cpython-39.pyc index 6907b982..d6ca6287 100644 Binary files a/models/official/vision/utils/object_detection/__pycache__/balanced_positive_negative_sampler.cpython-39.pyc and b/models/official/vision/utils/object_detection/__pycache__/balanced_positive_negative_sampler.cpython-39.pyc differ diff --git a/models/official/vision/utils/object_detection/__pycache__/box_coder.cpython-39.pyc b/models/official/vision/utils/object_detection/__pycache__/box_coder.cpython-39.pyc index fcc339b6..33d752cc 100644 Binary files a/models/official/vision/utils/object_detection/__pycache__/box_coder.cpython-39.pyc and b/models/official/vision/utils/object_detection/__pycache__/box_coder.cpython-39.pyc differ diff --git a/models/official/vision/utils/object_detection/__pycache__/box_list.cpython-39.pyc b/models/official/vision/utils/object_detection/__pycache__/box_list.cpython-39.pyc index ce9146e4..2b8bb464 100644 Binary files a/models/official/vision/utils/object_detection/__pycache__/box_list.cpython-39.pyc and b/models/official/vision/utils/object_detection/__pycache__/box_list.cpython-39.pyc differ diff --git a/models/official/vision/utils/object_detection/__pycache__/faster_rcnn_box_coder.cpython-39.pyc b/models/official/vision/utils/object_detection/__pycache__/faster_rcnn_box_coder.cpython-39.pyc index 973a3631..361e44b8 100644 Binary files a/models/official/vision/utils/object_detection/__pycache__/faster_rcnn_box_coder.cpython-39.pyc and b/models/official/vision/utils/object_detection/__pycache__/faster_rcnn_box_coder.cpython-39.pyc differ diff --git a/models/official/vision/utils/object_detection/__pycache__/minibatch_sampler.cpython-39.pyc b/models/official/vision/utils/object_detection/__pycache__/minibatch_sampler.cpython-39.pyc index d2ae1c32..d31b4929 100644 Binary files a/models/official/vision/utils/object_detection/__pycache__/minibatch_sampler.cpython-39.pyc and b/models/official/vision/utils/object_detection/__pycache__/minibatch_sampler.cpython-39.pyc differ diff --git a/models/official/vision/utils/object_detection/__pycache__/ops.cpython-39.pyc b/models/official/vision/utils/object_detection/__pycache__/ops.cpython-39.pyc index fb62b127..ae07c0f8 100644 Binary files a/models/official/vision/utils/object_detection/__pycache__/ops.cpython-39.pyc and b/models/official/vision/utils/object_detection/__pycache__/ops.cpython-39.pyc differ diff --git a/models/official/vision/utils/object_detection/__pycache__/shape_utils.cpython-39.pyc b/models/official/vision/utils/object_detection/__pycache__/shape_utils.cpython-39.pyc index cd77de6e..2134bc42 100644 Binary files a/models/official/vision/utils/object_detection/__pycache__/shape_utils.cpython-39.pyc and b/models/official/vision/utils/object_detection/__pycache__/shape_utils.cpython-39.pyc differ diff --git a/models/official/vision/utils/object_detection/__pycache__/visualization_utils.cpython-39.pyc b/models/official/vision/utils/object_detection/__pycache__/visualization_utils.cpython-39.pyc index 199d5727..203cf1bf 100644 Binary files a/models/official/vision/utils/object_detection/__pycache__/visualization_utils.cpython-39.pyc and b/models/official/vision/utils/object_detection/__pycache__/visualization_utils.cpython-39.pyc differ diff --git a/models/orbit/__pycache__/__init__.cpython-38.pyc b/models/orbit/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 00000000..7e2dda99 Binary files /dev/null and b/models/orbit/__pycache__/__init__.cpython-38.pyc differ diff --git a/models/orbit/__pycache__/__init__.cpython-39.pyc b/models/orbit/__pycache__/__init__.cpython-39.pyc index 8a220520..d9c50c4a 100644 Binary files a/models/orbit/__pycache__/__init__.cpython-39.pyc and b/models/orbit/__pycache__/__init__.cpython-39.pyc differ diff --git a/models/orbit/__pycache__/controller.cpython-38.pyc b/models/orbit/__pycache__/controller.cpython-38.pyc new file mode 100644 index 00000000..7b192fb4 Binary files /dev/null and b/models/orbit/__pycache__/controller.cpython-38.pyc differ diff --git a/models/orbit/__pycache__/controller.cpython-39.pyc b/models/orbit/__pycache__/controller.cpython-39.pyc index 0c7592d2..0295d785 100644 Binary files a/models/orbit/__pycache__/controller.cpython-39.pyc and b/models/orbit/__pycache__/controller.cpython-39.pyc differ diff --git a/models/orbit/__pycache__/runner.cpython-38.pyc b/models/orbit/__pycache__/runner.cpython-38.pyc new file mode 100644 index 00000000..d899b0b5 Binary files /dev/null and b/models/orbit/__pycache__/runner.cpython-38.pyc differ diff --git a/models/orbit/__pycache__/runner.cpython-39.pyc b/models/orbit/__pycache__/runner.cpython-39.pyc index c455c86f..79c70d58 100644 Binary files a/models/orbit/__pycache__/runner.cpython-39.pyc and b/models/orbit/__pycache__/runner.cpython-39.pyc differ diff --git a/models/orbit/__pycache__/standard_runner.cpython-38.pyc b/models/orbit/__pycache__/standard_runner.cpython-38.pyc new file mode 100644 index 00000000..79911b09 Binary files /dev/null and b/models/orbit/__pycache__/standard_runner.cpython-38.pyc differ diff --git a/models/orbit/__pycache__/standard_runner.cpython-39.pyc b/models/orbit/__pycache__/standard_runner.cpython-39.pyc index 32dd1425..9c751972 100644 Binary files a/models/orbit/__pycache__/standard_runner.cpython-39.pyc and b/models/orbit/__pycache__/standard_runner.cpython-39.pyc differ diff --git a/models/orbit/actions/__pycache__/__init__.cpython-38.pyc b/models/orbit/actions/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 00000000..9cb968cc Binary files /dev/null and b/models/orbit/actions/__pycache__/__init__.cpython-38.pyc differ diff --git a/models/orbit/actions/__pycache__/__init__.cpython-39.pyc b/models/orbit/actions/__pycache__/__init__.cpython-39.pyc index 19216db4..14ee98fc 100644 Binary files a/models/orbit/actions/__pycache__/__init__.cpython-39.pyc and b/models/orbit/actions/__pycache__/__init__.cpython-39.pyc differ diff --git a/models/orbit/actions/__pycache__/conditional_action.cpython-38.pyc b/models/orbit/actions/__pycache__/conditional_action.cpython-38.pyc new file mode 100644 index 00000000..b92edbb7 Binary files /dev/null and b/models/orbit/actions/__pycache__/conditional_action.cpython-38.pyc differ diff --git a/models/orbit/actions/__pycache__/conditional_action.cpython-39.pyc b/models/orbit/actions/__pycache__/conditional_action.cpython-39.pyc index 01031177..2bd898d2 100644 Binary files a/models/orbit/actions/__pycache__/conditional_action.cpython-39.pyc and b/models/orbit/actions/__pycache__/conditional_action.cpython-39.pyc differ diff --git a/models/orbit/actions/__pycache__/export_saved_model.cpython-38.pyc b/models/orbit/actions/__pycache__/export_saved_model.cpython-38.pyc new file mode 100644 index 00000000..c4e8662c Binary files /dev/null and b/models/orbit/actions/__pycache__/export_saved_model.cpython-38.pyc differ diff --git a/models/orbit/actions/__pycache__/export_saved_model.cpython-39.pyc b/models/orbit/actions/__pycache__/export_saved_model.cpython-39.pyc index de695fe4..fcecd422 100644 Binary files a/models/orbit/actions/__pycache__/export_saved_model.cpython-39.pyc and b/models/orbit/actions/__pycache__/export_saved_model.cpython-39.pyc differ diff --git a/models/orbit/actions/__pycache__/new_best_metric.cpython-38.pyc b/models/orbit/actions/__pycache__/new_best_metric.cpython-38.pyc new file mode 100644 index 00000000..8b97b036 Binary files /dev/null and b/models/orbit/actions/__pycache__/new_best_metric.cpython-38.pyc differ diff --git a/models/orbit/actions/__pycache__/new_best_metric.cpython-39.pyc b/models/orbit/actions/__pycache__/new_best_metric.cpython-39.pyc index 27cb7016..bec62a9f 100644 Binary files a/models/orbit/actions/__pycache__/new_best_metric.cpython-39.pyc and b/models/orbit/actions/__pycache__/new_best_metric.cpython-39.pyc differ diff --git a/models/orbit/actions/__pycache__/save_checkpoint_if_preempted.cpython-38.pyc b/models/orbit/actions/__pycache__/save_checkpoint_if_preempted.cpython-38.pyc new file mode 100644 index 00000000..f0f01d76 Binary files /dev/null and b/models/orbit/actions/__pycache__/save_checkpoint_if_preempted.cpython-38.pyc differ diff --git a/models/orbit/actions/__pycache__/save_checkpoint_if_preempted.cpython-39.pyc b/models/orbit/actions/__pycache__/save_checkpoint_if_preempted.cpython-39.pyc index 5d16fb02..67ed9900 100644 Binary files a/models/orbit/actions/__pycache__/save_checkpoint_if_preempted.cpython-39.pyc and b/models/orbit/actions/__pycache__/save_checkpoint_if_preempted.cpython-39.pyc differ diff --git a/models/orbit/controller.py b/models/orbit/controller.py index 1f277231..054b9007 100644 --- a/models/orbit/controller.py +++ b/models/orbit/controller.py @@ -481,6 +481,7 @@ def _train_n_steps(self, num_steps: int): should_record = lambda: (self.global_step % self.summary_interval == 0) with tf.summary.record_if(should_record): num_steps_tensor = tf.convert_to_tensor(num_steps, dtype=tf.int32) + train_output = self.trainer.train(num_steps_tensor) # Verify that global_step was updated properly, then update current_step. diff --git a/models/orbit/utils/__pycache__/__init__.cpython-38.pyc b/models/orbit/utils/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 00000000..ef2b9c08 Binary files /dev/null and b/models/orbit/utils/__pycache__/__init__.cpython-38.pyc differ diff --git a/models/orbit/utils/__pycache__/__init__.cpython-39.pyc b/models/orbit/utils/__pycache__/__init__.cpython-39.pyc index e78ca24c..9bc676c5 100644 Binary files a/models/orbit/utils/__pycache__/__init__.cpython-39.pyc and b/models/orbit/utils/__pycache__/__init__.cpython-39.pyc differ diff --git a/models/orbit/utils/__pycache__/common.cpython-38.pyc b/models/orbit/utils/__pycache__/common.cpython-38.pyc new file mode 100644 index 00000000..30cf3e1f Binary files /dev/null and b/models/orbit/utils/__pycache__/common.cpython-38.pyc differ diff --git a/models/orbit/utils/__pycache__/common.cpython-39.pyc b/models/orbit/utils/__pycache__/common.cpython-39.pyc index 0fbdd179..ed758944 100644 Binary files a/models/orbit/utils/__pycache__/common.cpython-39.pyc and b/models/orbit/utils/__pycache__/common.cpython-39.pyc differ diff --git a/models/orbit/utils/__pycache__/epoch_helper.cpython-38.pyc b/models/orbit/utils/__pycache__/epoch_helper.cpython-38.pyc new file mode 100644 index 00000000..eb8f702d Binary files /dev/null and b/models/orbit/utils/__pycache__/epoch_helper.cpython-38.pyc differ diff --git a/models/orbit/utils/__pycache__/epoch_helper.cpython-39.pyc b/models/orbit/utils/__pycache__/epoch_helper.cpython-39.pyc index 9c55ca26..a947937b 100644 Binary files a/models/orbit/utils/__pycache__/epoch_helper.cpython-39.pyc and b/models/orbit/utils/__pycache__/epoch_helper.cpython-39.pyc differ diff --git a/models/orbit/utils/__pycache__/loop_fns.cpython-38.pyc b/models/orbit/utils/__pycache__/loop_fns.cpython-38.pyc new file mode 100644 index 00000000..7f28861c Binary files /dev/null and b/models/orbit/utils/__pycache__/loop_fns.cpython-38.pyc differ diff --git a/models/orbit/utils/__pycache__/loop_fns.cpython-39.pyc b/models/orbit/utils/__pycache__/loop_fns.cpython-39.pyc index 81826598..d295ae07 100644 Binary files a/models/orbit/utils/__pycache__/loop_fns.cpython-39.pyc and b/models/orbit/utils/__pycache__/loop_fns.cpython-39.pyc differ diff --git a/models/orbit/utils/__pycache__/summary_manager.cpython-38.pyc b/models/orbit/utils/__pycache__/summary_manager.cpython-38.pyc new file mode 100644 index 00000000..e5f0288c Binary files /dev/null and b/models/orbit/utils/__pycache__/summary_manager.cpython-38.pyc differ diff --git a/models/orbit/utils/__pycache__/summary_manager.cpython-39.pyc b/models/orbit/utils/__pycache__/summary_manager.cpython-39.pyc index eb04aeb8..d84a14c9 100644 Binary files a/models/orbit/utils/__pycache__/summary_manager.cpython-39.pyc and b/models/orbit/utils/__pycache__/summary_manager.cpython-39.pyc differ diff --git a/models/orbit/utils/__pycache__/summary_manager_interface.cpython-38.pyc b/models/orbit/utils/__pycache__/summary_manager_interface.cpython-38.pyc new file mode 100644 index 00000000..1913b1af Binary files /dev/null and b/models/orbit/utils/__pycache__/summary_manager_interface.cpython-38.pyc differ diff --git a/models/orbit/utils/__pycache__/summary_manager_interface.cpython-39.pyc b/models/orbit/utils/__pycache__/summary_manager_interface.cpython-39.pyc index 0d31a863..813ecf16 100644 Binary files a/models/orbit/utils/__pycache__/summary_manager_interface.cpython-39.pyc and b/models/orbit/utils/__pycache__/summary_manager_interface.cpython-39.pyc differ diff --git a/models/orbit/utils/__pycache__/tpu_summaries.cpython-38.pyc b/models/orbit/utils/__pycache__/tpu_summaries.cpython-38.pyc new file mode 100644 index 00000000..7999d962 Binary files /dev/null and b/models/orbit/utils/__pycache__/tpu_summaries.cpython-38.pyc differ diff --git a/models/orbit/utils/__pycache__/tpu_summaries.cpython-39.pyc b/models/orbit/utils/__pycache__/tpu_summaries.cpython-39.pyc index c540b0f2..bb5fb0b7 100644 Binary files a/models/orbit/utils/__pycache__/tpu_summaries.cpython-39.pyc and b/models/orbit/utils/__pycache__/tpu_summaries.cpython-39.pyc differ diff --git a/params.yaml b/params.yaml new file mode 100644 index 00000000..7503c753 --- /dev/null +++ b/params.yaml @@ -0,0 +1,187 @@ +runtime: + all_reduce_alg: null + batchnorm_spatial_persistent: false + dataset_num_private_threads: null + default_shard_dim: -1 + distribution_strategy: mirrored + enable_xla: false + gpu_thread_mode: null + loss_scale: null + mixed_precision_dtype: null + num_cores_per_replica: 1 + num_gpus: 0 + num_packs: 1 + per_gpu_thread_count: 0 + run_eagerly: false + task_index: -1 + tpu: null + tpu_enable_xla_dynamic_padder: null + worker_hosts: null +task: + allow_image_summary: false + annotation_file: null + differential_privacy_config: null + init_checkpoint: '' + init_checkpoint_modules: backbone + losses: + background_cls_weight: 0.1 + class_offset: 0 + l2_weight_decay: 0.0001 + lambda_box: 5.0 + lambda_cls: 1.0 + lambda_giou: 2.0 + model: + backbone: + resnet: + bn_trainable: false + depth_multiplier: 1.0 + model_id: 50 + replace_stem_max_pool: false + resnetd_shortcut: false + scale_stem: true + se_ratio: 0.0 + stem_type: v0 + stochastic_depth_drop_rate: 0.0 + type: resnet + backbone_endpoint_name: '5' + hidden_size: 256 + input_size: [1333, 1333, 3] + norm_activation: + activation: relu + norm_epsilon: 0.001 + norm_momentum: 0.99 + use_sync_bn: true + num_classes: 81 + num_decoder_layers: 6 + num_encoder_layers: 6 + num_queries: 100 + name: null + per_category_metrics: false + train_data: + apply_tf_data_service_before_batching: false + block_length: 1 + cache: false + cycle_length: null + deterministic: null + drop_remainder: true + enable_shared_tf_data_service_between_parallel_trainers: false + enable_tf_data_service: false + global_batch_size: 64 + input_path: '' + is_training: true + max_num_boxes: 100 + output_size: !!python/tuple + - 1333 + - 1333 + prefetch_buffer_size: null + resize_scales: !!python/tuple + - 480 + - 512 + - 544 + - 576 + - 608 + - 640 + - 672 + - 704 + - 736 + - 768 + - 800 + seed: null + sharding: true + shuffle_buffer_size: 1000 + tf_data_service_address: null + tf_data_service_job_name: null + tfds_as_supervised: false + tfds_data_dir: '' + tfds_name: coco/2017 + tfds_skip_decoding_feature: '' + tfds_split: train + trainer_id: null + validation_data: + apply_tf_data_service_before_batching: false + block_length: 1 + cache: false + cycle_length: null + deterministic: null + drop_remainder: false + enable_shared_tf_data_service_between_parallel_trainers: false + enable_tf_data_service: false + global_batch_size: 64 + input_path: '' + is_training: false + max_num_boxes: 100 + output_size: !!python/tuple + - 1333 + - 1333 + prefetch_buffer_size: null + resize_scales: !!python/tuple + - 480 + - 512 + - 544 + - 576 + - 608 + - 640 + - 672 + - 704 + - 736 + - 768 + - 800 + seed: null + sharding: true + shuffle_buffer_size: 100 + tf_data_service_address: null + tf_data_service_job_name: null + tfds_as_supervised: false + tfds_data_dir: '' + tfds_name: coco/2017 + tfds_skip_decoding_feature: '' + tfds_split: validation + trainer_id: null +trainer: + allow_tpu_summary: false + best_checkpoint_eval_metric: AP + best_checkpoint_export_subdir: best_ckpt + best_checkpoint_metric_comp: higher + checkpoint_interval: 10000 + continuous_eval_timeout: 3600 + eval_tf_function: true + eval_tf_while_loop: false + loss_upper_bound: 1000000.0 + max_to_keep: 1 + optimizer_config: + ema: null + learning_rate: + stepwise: + boundaries: [369600] + name: PiecewiseConstantDecay + offset: 0 + values: [0.0001, 1.0e-05] + type: stepwise + optimizer: + detr_adamw: + amsgrad: false + beta_1: 0.9 + beta_2: 0.999 + clipnorm: null + clipvalue: null + epsilon: 1.0e-07 + exclude_from_weight_decay: null + global_clipnorm: 0.1 + gradient_clip_norm: 0.0 + include_in_weight_decay: null + name: AdamWeightDecay + weight_decay_rate: 0.0001 + type: detr_adamw + warmup: + type: null + preemption_on_demand_checkpoint: true + recovery_begin_steps: 0 + recovery_max_trials: 0 + steps_per_loop: 10000 + summary_interval: 10000 + train_steps: 554400 + train_tf_function: true + train_tf_while_loop: true + validation_interval: 10000 + validation_steps: -1 + validation_summary_subdir: validation