@@ -28,20 +28,29 @@ class DINOV3Backbone(FeaturePyramidBackbone):
2828 embedding layer. Defaults to `0`.
2929 use_mask_token: bool. Whether to use a mask token in the embedding
3030 layer. Defaults to `True`.
31+ hidden_activation: str or callable. Activation to use in the MLP.
32+ Defaults to `"gelu"`.
3133 use_gated_mlp: bool. Whether to use Gated MLP layers. Defaults to
3234 `False`.
35+ use_query_bias: bool. Whether to use a bias for the query projection.
36+ Defaults to `True`.
37+ use_key_bias: bool. Whether to use a bias for the key projection.
38+ Defaults to `True`.
39+ use_value_bias: bool. Whether to use a bias for the value projection.
40+ Defaults to `True`.
41+ use_proj_bias: bool. Whether to use a bias for the output projection.
42+ Defaults to `True`.
43+ use_mlp_bias: bool. Whether to use a bias for the dense layers in MLP.
44+ Defaults to `True`.
3345 attention_dropout: float. The dropout rate for the attention
3446 probabilities. Defaults to `0.0`.
3547 drop_path_rate: float. The drop path rate to use. Defaults to `0.0`.
3648 image_shape: tuple. The input shape without the batch size. Defaults to
3749 `(518, 518, 3)`.
3850 rope_theta: float. The base period of the rotary position embeddings.
51+ Defaults to `100.0`.
3952 apply_layernorm: bool. Whether to apply layer normalization to the
4053 outputs of each stage in the feature pyramid. Defaults to `False`.
41- query_bias: bool. Whether to use a bias for the query projection.
42- key_bias: bool. Whether to use a bias for the key projection.
43- value_bias: bool. Whether to use a bias for the value projection.
44- proj_bias: bool. Whether to use a bias for the output projection.
4554 data_format: `None` or str. If specified, either `"channels_last"` or
4655 `"channels_first"`. The ordering of the dimensions in the
4756 inputs. `"channels_last"` corresponds to inputs with shape
@@ -67,16 +76,19 @@ def __init__(
6776 layer_scale_init_value = 1.0 ,
6877 num_register_tokens = 4 ,
6978 use_mask_token = True ,
79+ hidden_activation = "gelu" ,
7080 use_gated_mlp = False ,
81+ use_query_bias = True ,
82+ use_key_bias = True ,
83+ use_value_bias = True ,
84+ use_proj_bias = True ,
85+ use_mlp_bias = True ,
7186 attention_dropout = 0.0 ,
7287 drop_path_rate = 0.0 ,
88+ layer_norm_eps = 1e-5 ,
7389 image_shape = (518 , 518 , 3 ),
74- rope_theta = 10000 .0 ,
90+ rope_theta = 100 .0 ,
7591 apply_layernorm = False ,
76- query_bias = True ,
77- key_bias = True ,
78- value_bias = True ,
79- proj_bias = True ,
8092 data_format = None ,
8193 dtype = None ,
8294 name = None ,
@@ -110,18 +122,21 @@ def __init__(
110122 num_heads = num_heads ,
111123 intermediate_dim = intermediate_dim ,
112124 layer_scale_init_value = layer_scale_init_value ,
125+ hidden_activation = hidden_activation ,
113126 use_gated_mlp = use_gated_mlp ,
127+ use_query_bias = use_query_bias ,
128+ use_key_bias = use_key_bias ,
129+ use_value_bias = use_value_bias ,
130+ use_proj_bias = use_proj_bias ,
131+ use_mlp_bias = use_mlp_bias ,
114132 attention_dropout = attention_dropout ,
115133 drop_path_rate = drop_path_rate ,
116- query_bias = query_bias ,
117- key_bias = key_bias ,
118- value_bias = value_bias ,
119- proj_bias = proj_bias ,
134+ layer_norm_eps = layer_norm_eps ,
120135 dtype = dtype ,
121136 name = f"{ prefix } encoder" ,
122137 )
123138 self .layernorm = layers .LayerNormalization (
124- epsilon = 1e-6 , dtype = dtype , name = f"{ prefix } layernorm"
139+ epsilon = layer_norm_eps , dtype = dtype , name = f"{ prefix } layernorm"
125140 )
126141
127142 # === Functional Model ===
@@ -161,16 +176,19 @@ def __init__(
161176 self .layer_scale_init_value = float (layer_scale_init_value )
162177 self .num_register_tokens = int (num_register_tokens )
163178 self .use_mask_token = bool (use_mask_token )
179+ self .hidden_activation = hidden_activation
164180 self .use_gated_mlp = bool (use_gated_mlp )
181+ self .use_query_bias = bool (use_query_bias )
182+ self .use_key_bias = bool (use_key_bias )
183+ self .use_value_bias = bool (use_value_bias )
184+ self .use_proj_bias = bool (use_proj_bias )
185+ self .use_mlp_bias = bool (use_mlp_bias )
165186 self .attention_dropout = float (attention_dropout )
166187 self .drop_path_rate = float (drop_path_rate )
188+ self .layer_norm_eps = float (layer_norm_eps )
167189 self .image_shape = image_shape
168190 self .rope_theta = rope_theta
169191 self .apply_layernorm = apply_layernorm
170- self .query_bias = query_bias
171- self .key_bias = key_bias
172- self .value_bias = value_bias
173- self .proj_bias = proj_bias
174192 self .pyramid_outputs = pyramid_outputs
175193
176194 def get_config (self ):
@@ -182,19 +200,22 @@ def get_config(self):
182200 "hidden_dim" : self .hidden_dim ,
183201 "num_heads" : self .num_heads ,
184202 "intermediate_dim" : self .intermediate_dim ,
185- "layer_scale_init_value" : self .layer_scale_init_value ,
186203 "num_register_tokens" : self .num_register_tokens ,
187204 "use_mask_token" : self .use_mask_token ,
205+ "layer_scale_init_value" : self .layer_scale_init_value ,
206+ "hidden_activation" : self .hidden_activation ,
188207 "use_gated_mlp" : self .use_gated_mlp ,
208+ "use_query_bias" : self .use_query_bias ,
209+ "use_key_bias" : self .use_key_bias ,
210+ "use_value_bias" : self .use_value_bias ,
211+ "use_proj_bias" : self .use_proj_bias ,
212+ "use_mlp_bias" : self .use_mlp_bias ,
189213 "attention_dropout" : self .attention_dropout ,
190214 "drop_path_rate" : self .drop_path_rate ,
215+ "layer_norm_eps" : self .layer_norm_eps ,
191216 "image_shape" : self .image_shape ,
192217 "rope_theta" : self .rope_theta ,
193218 "apply_layernorm" : self .apply_layernorm ,
194- "query_bias" : self .query_bias ,
195- "key_bias" : self .key_bias ,
196- "value_bias" : self .value_bias ,
197- "proj_bias" : self .proj_bias ,
198219 }
199220 )
200221 return config
0 commit comments