keras-team
diff --git a/‎keras_hub/src/models/dinov2/dinov2_layers.py‎
Lines changed: 3 additions & 1 deletion b/‎keras_hub/src/models/dinov2/dinov2_layers.py‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎keras_hub/src/models/dinov3/dinov3_backbone.py‎
Lines changed: 44 additions & 23 deletions b/‎keras_hub/src/models/dinov3/dinov3_backbone.py‎
Lines changed: 44 additions & 23 deletions
diff --git a/‎keras_hub/src/models/dinov3/dinov3_backbone_test.py‎
Lines changed: 4 additions & 4 deletions b/‎keras_hub/src/models/dinov3/dinov3_backbone_test.py‎
Lines changed: 4 additions & 4 deletions
@@ -502,7 +502,9 @@ def call(self, inputs, training=None):
 
     def get_config(self):
         config = super().get_config()
-        config.update({"hidden_dim": self.hidden_dim})
+        config.update(
+            {"hidden_dim": self.hidden_dim, "init_values": self.init_values}
+        )
         return config
 
     def compute_output_shape(self, input_shape):
 
@@ -28,20 +28,29 @@ class DINOV3Backbone(FeaturePyramidBackbone):
             embedding layer. Defaults to `0`.
         use_mask_token: bool. Whether to use a mask token in the embedding
             layer. Defaults to `True`.
+        hidden_activation: str or callable. Activation to use in the MLP.
+            Defaults to `"gelu"`.
         use_gated_mlp: bool. Whether to use Gated MLP layers. Defaults to
             `False`.
+        use_query_bias: bool. Whether to use a bias for the query projection.
+            Defaults to `True`.
+        use_key_bias: bool. Whether to use a bias for the key projection.
+            Defaults to `True`.
+        use_value_bias: bool. Whether to use a bias for the value projection.
+            Defaults to `True`.
+        use_proj_bias: bool. Whether to use a bias for the output projection.
+            Defaults to `True`.
+        use_mlp_bias: bool. Whether to use a bias for the dense layers in MLP.
+            Defaults to `True`.
         attention_dropout: float. The dropout rate for the attention
             probabilities. Defaults to `0.0`.
         drop_path_rate: float. The drop path rate to use. Defaults to `0.0`.
         image_shape: tuple. The input shape without the batch size. Defaults to
             `(518, 518, 3)`.
         rope_theta: float. The base period of the rotary position embeddings.
+            Defaults to `100.0`.
         apply_layernorm: bool. Whether to apply layer normalization to the
             outputs of each stage in the feature pyramid. Defaults to `False`.
-        query_bias: bool. Whether to use a bias for the query projection.
-        key_bias: bool. Whether to use a bias for the key projection.
-        value_bias: bool. Whether to use a bias for the value projection.
-        proj_bias: bool. Whether to use a bias for the output projection.
         data_format: `None` or str. If specified, either `"channels_last"` or
             `"channels_first"`. The ordering of the dimensions in the
             inputs. `"channels_last"` corresponds to inputs with shape
@@ -67,16 +76,19 @@ def __init__(
         layer_scale_init_value=1.0,
         num_register_tokens=4,
         use_mask_token=True,
+        hidden_activation="gelu",
         use_gated_mlp=False,
+        use_query_bias=True,
+        use_key_bias=True,
+        use_value_bias=True,
+        use_proj_bias=True,
+        use_mlp_bias=True,
         attention_dropout=0.0,
         drop_path_rate=0.0,
+        layer_norm_eps=1e-5,
         image_shape=(518, 518, 3),
-        rope_theta=10000.0,
+        rope_theta=100.0,
         apply_layernorm=False,
-        query_bias=True,
-        key_bias=True,
-        value_bias=True,
-        proj_bias=True,
         data_format=None,
         dtype=None,
         name=None,
@@ -110,18 +122,21 @@ def __init__(
             num_heads=num_heads,
             intermediate_dim=intermediate_dim,
             layer_scale_init_value=layer_scale_init_value,
+            hidden_activation=hidden_activation,
             use_gated_mlp=use_gated_mlp,
+            use_query_bias=use_query_bias,
+            use_key_bias=use_key_bias,
+            use_value_bias=use_value_bias,
+            use_proj_bias=use_proj_bias,
+            use_mlp_bias=use_mlp_bias,
             attention_dropout=attention_dropout,
             drop_path_rate=drop_path_rate,
-            query_bias=query_bias,
-            key_bias=key_bias,
-            value_bias=value_bias,
-            proj_bias=proj_bias,
+            layer_norm_eps=layer_norm_eps,
             dtype=dtype,
             name=f"{prefix}encoder",
         )
         self.layernorm = layers.LayerNormalization(
-            epsilon=1e-6, dtype=dtype, name=f"{prefix}layernorm"
+            epsilon=layer_norm_eps, dtype=dtype, name=f"{prefix}layernorm"
         )
 
         # === Functional Model ===
@@ -161,16 +176,19 @@ def __init__(
         self.layer_scale_init_value = float(layer_scale_init_value)
         self.num_register_tokens = int(num_register_tokens)
         self.use_mask_token = bool(use_mask_token)
+        self.hidden_activation = hidden_activation
         self.use_gated_mlp = bool(use_gated_mlp)
+        self.use_query_bias = bool(use_query_bias)
+        self.use_key_bias = bool(use_key_bias)
+        self.use_value_bias = bool(use_value_bias)
+        self.use_proj_bias = bool(use_proj_bias)
+        self.use_mlp_bias = bool(use_mlp_bias)
         self.attention_dropout = float(attention_dropout)
         self.drop_path_rate = float(drop_path_rate)
+        self.layer_norm_eps = float(layer_norm_eps)
         self.image_shape = image_shape
         self.rope_theta = rope_theta
         self.apply_layernorm = apply_layernorm
-        self.query_bias = query_bias
-        self.key_bias = key_bias
-        self.value_bias = value_bias
-        self.proj_bias = proj_bias
         self.pyramid_outputs = pyramid_outputs
 
     def get_config(self):
@@ -182,19 +200,22 @@ def get_config(self):
                 "hidden_dim": self.hidden_dim,
                 "num_heads": self.num_heads,
                 "intermediate_dim": self.intermediate_dim,
-                "layer_scale_init_value": self.layer_scale_init_value,
                 "num_register_tokens": self.num_register_tokens,
                 "use_mask_token": self.use_mask_token,
+                "layer_scale_init_value": self.layer_scale_init_value,
+                "hidden_activation": self.hidden_activation,
                 "use_gated_mlp": self.use_gated_mlp,
+                "use_query_bias": self.use_query_bias,
+                "use_key_bias": self.use_key_bias,
+                "use_value_bias": self.use_value_bias,
+                "use_proj_bias": self.use_proj_bias,
+                "use_mlp_bias": self.use_mlp_bias,
                 "attention_dropout": self.attention_dropout,
                 "drop_path_rate": self.drop_path_rate,
+                "layer_norm_eps": self.layer_norm_eps,
                 "image_shape": self.image_shape,
                 "rope_theta": self.rope_theta,
                 "apply_layernorm": self.apply_layernorm,
-                "query_bias": self.query_bias,
-                "key_bias": self.key_bias,
-                "value_bias": self.value_bias,
-                "proj_bias": self.proj_bias,
             }
         )
         return config
@@ -11,19 +11,19 @@
 class DINOV3BackboneTest(TestCase):
     def setUp(self):
         self.init_kwargs = {
-            "patch_size": 14,
+            "patch_size": 16,
             "num_layers": 2,
             "hidden_dim": 16,
             "num_heads": 2,
             "intermediate_dim": 16 * 4,
             "layer_scale_init_value": 1.0,
             "num_register_tokens": 4,
             "use_gated_mlp": False,
-            "image_shape": (70, 70, 3),
+            "image_shape": (64, 64, 3),
             "name": "dinov3_backbone",
         }
         self.input_data = {
-            "images": ops.ones((2, 70, 70, 3)),
+            "images": ops.ones((2, 64, 64, 3)),
         }
 
     def test_backbone_basics(self):
@@ -70,7 +70,7 @@ def test_position_embedding_interpolation(self):
         model.save_to_preset(path)
         restored_model = DINOV3Backbone.from_preset(
             path,
-            image_shape=(128, 128, 3),  # From 70 to 128.
+            image_shape=(128, 128, 3),  # From 64 to 128.
         )
         input_data = {
             "images": ops.ones((2, 128, 128, 3)),