Skip to content

Commit 9550d79

Browse files
committed
Update DINOV3 impls.
1 parent 9c08586 commit 9550d79

File tree

6 files changed

+423
-232
lines changed

6 files changed

+423
-232
lines changed

keras_hub/src/models/dinov2/dinov2_layers.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -502,7 +502,9 @@ def call(self, inputs, training=None):
502502

503503
def get_config(self):
504504
config = super().get_config()
505-
config.update({"hidden_dim": self.hidden_dim})
505+
config.update(
506+
{"hidden_dim": self.hidden_dim, "init_values": self.init_values}
507+
)
506508
return config
507509

508510
def compute_output_shape(self, input_shape):

keras_hub/src/models/dinov3/dinov3_backbone.py

Lines changed: 44 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -28,20 +28,29 @@ class DINOV3Backbone(FeaturePyramidBackbone):
2828
embedding layer. Defaults to `0`.
2929
use_mask_token: bool. Whether to use a mask token in the embedding
3030
layer. Defaults to `True`.
31+
hidden_activation: str or callable. Activation to use in the MLP.
32+
Defaults to `"gelu"`.
3133
use_gated_mlp: bool. Whether to use Gated MLP layers. Defaults to
3234
`False`.
35+
use_query_bias: bool. Whether to use a bias for the query projection.
36+
Defaults to `True`.
37+
use_key_bias: bool. Whether to use a bias for the key projection.
38+
Defaults to `True`.
39+
use_value_bias: bool. Whether to use a bias for the value projection.
40+
Defaults to `True`.
41+
use_proj_bias: bool. Whether to use a bias for the output projection.
42+
Defaults to `True`.
43+
use_mlp_bias: bool. Whether to use a bias for the dense layers in MLP.
44+
Defaults to `True`.
3345
attention_dropout: float. The dropout rate for the attention
3446
probabilities. Defaults to `0.0`.
3547
drop_path_rate: float. The drop path rate to use. Defaults to `0.0`.
3648
image_shape: tuple. The input shape without the batch size. Defaults to
3749
`(518, 518, 3)`.
3850
rope_theta: float. The base period of the rotary position embeddings.
51+
Defaults to `100.0`.
3952
apply_layernorm: bool. Whether to apply layer normalization to the
4053
outputs of each stage in the feature pyramid. Defaults to `False`.
41-
query_bias: bool. Whether to use a bias for the query projection.
42-
key_bias: bool. Whether to use a bias for the key projection.
43-
value_bias: bool. Whether to use a bias for the value projection.
44-
proj_bias: bool. Whether to use a bias for the output projection.
4554
data_format: `None` or str. If specified, either `"channels_last"` or
4655
`"channels_first"`. The ordering of the dimensions in the
4756
inputs. `"channels_last"` corresponds to inputs with shape
@@ -67,16 +76,19 @@ def __init__(
6776
layer_scale_init_value=1.0,
6877
num_register_tokens=4,
6978
use_mask_token=True,
79+
hidden_activation="gelu",
7080
use_gated_mlp=False,
81+
use_query_bias=True,
82+
use_key_bias=True,
83+
use_value_bias=True,
84+
use_proj_bias=True,
85+
use_mlp_bias=True,
7186
attention_dropout=0.0,
7287
drop_path_rate=0.0,
88+
layer_norm_eps=1e-5,
7389
image_shape=(518, 518, 3),
74-
rope_theta=10000.0,
90+
rope_theta=100.0,
7591
apply_layernorm=False,
76-
query_bias=True,
77-
key_bias=True,
78-
value_bias=True,
79-
proj_bias=True,
8092
data_format=None,
8193
dtype=None,
8294
name=None,
@@ -110,18 +122,21 @@ def __init__(
110122
num_heads=num_heads,
111123
intermediate_dim=intermediate_dim,
112124
layer_scale_init_value=layer_scale_init_value,
125+
hidden_activation=hidden_activation,
113126
use_gated_mlp=use_gated_mlp,
127+
use_query_bias=use_query_bias,
128+
use_key_bias=use_key_bias,
129+
use_value_bias=use_value_bias,
130+
use_proj_bias=use_proj_bias,
131+
use_mlp_bias=use_mlp_bias,
114132
attention_dropout=attention_dropout,
115133
drop_path_rate=drop_path_rate,
116-
query_bias=query_bias,
117-
key_bias=key_bias,
118-
value_bias=value_bias,
119-
proj_bias=proj_bias,
134+
layer_norm_eps=layer_norm_eps,
120135
dtype=dtype,
121136
name=f"{prefix}encoder",
122137
)
123138
self.layernorm = layers.LayerNormalization(
124-
epsilon=1e-6, dtype=dtype, name=f"{prefix}layernorm"
139+
epsilon=layer_norm_eps, dtype=dtype, name=f"{prefix}layernorm"
125140
)
126141

127142
# === Functional Model ===
@@ -161,16 +176,19 @@ def __init__(
161176
self.layer_scale_init_value = float(layer_scale_init_value)
162177
self.num_register_tokens = int(num_register_tokens)
163178
self.use_mask_token = bool(use_mask_token)
179+
self.hidden_activation = hidden_activation
164180
self.use_gated_mlp = bool(use_gated_mlp)
181+
self.use_query_bias = bool(use_query_bias)
182+
self.use_key_bias = bool(use_key_bias)
183+
self.use_value_bias = bool(use_value_bias)
184+
self.use_proj_bias = bool(use_proj_bias)
185+
self.use_mlp_bias = bool(use_mlp_bias)
165186
self.attention_dropout = float(attention_dropout)
166187
self.drop_path_rate = float(drop_path_rate)
188+
self.layer_norm_eps = float(layer_norm_eps)
167189
self.image_shape = image_shape
168190
self.rope_theta = rope_theta
169191
self.apply_layernorm = apply_layernorm
170-
self.query_bias = query_bias
171-
self.key_bias = key_bias
172-
self.value_bias = value_bias
173-
self.proj_bias = proj_bias
174192
self.pyramid_outputs = pyramid_outputs
175193

176194
def get_config(self):
@@ -182,19 +200,22 @@ def get_config(self):
182200
"hidden_dim": self.hidden_dim,
183201
"num_heads": self.num_heads,
184202
"intermediate_dim": self.intermediate_dim,
185-
"layer_scale_init_value": self.layer_scale_init_value,
186203
"num_register_tokens": self.num_register_tokens,
187204
"use_mask_token": self.use_mask_token,
205+
"layer_scale_init_value": self.layer_scale_init_value,
206+
"hidden_activation": self.hidden_activation,
188207
"use_gated_mlp": self.use_gated_mlp,
208+
"use_query_bias": self.use_query_bias,
209+
"use_key_bias": self.use_key_bias,
210+
"use_value_bias": self.use_value_bias,
211+
"use_proj_bias": self.use_proj_bias,
212+
"use_mlp_bias": self.use_mlp_bias,
189213
"attention_dropout": self.attention_dropout,
190214
"drop_path_rate": self.drop_path_rate,
215+
"layer_norm_eps": self.layer_norm_eps,
191216
"image_shape": self.image_shape,
192217
"rope_theta": self.rope_theta,
193218
"apply_layernorm": self.apply_layernorm,
194-
"query_bias": self.query_bias,
195-
"key_bias": self.key_bias,
196-
"value_bias": self.value_bias,
197-
"proj_bias": self.proj_bias,
198219
}
199220
)
200221
return config

keras_hub/src/models/dinov3/dinov3_backbone_test.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11,19 +11,19 @@
1111
class DINOV3BackboneTest(TestCase):
1212
def setUp(self):
1313
self.init_kwargs = {
14-
"patch_size": 14,
14+
"patch_size": 16,
1515
"num_layers": 2,
1616
"hidden_dim": 16,
1717
"num_heads": 2,
1818
"intermediate_dim": 16 * 4,
1919
"layer_scale_init_value": 1.0,
2020
"num_register_tokens": 4,
2121
"use_gated_mlp": False,
22-
"image_shape": (70, 70, 3),
22+
"image_shape": (64, 64, 3),
2323
"name": "dinov3_backbone",
2424
}
2525
self.input_data = {
26-
"images": ops.ones((2, 70, 70, 3)),
26+
"images": ops.ones((2, 64, 64, 3)),
2727
}
2828

2929
def test_backbone_basics(self):
@@ -70,7 +70,7 @@ def test_position_embedding_interpolation(self):
7070
model.save_to_preset(path)
7171
restored_model = DINOV3Backbone.from_preset(
7272
path,
73-
image_shape=(128, 128, 3), # From 70 to 128.
73+
image_shape=(128, 128, 3), # From 64 to 128.
7474
)
7575
input_data = {
7676
"images": ops.ones((2, 128, 128, 3)),

0 commit comments

Comments
 (0)