@@ -74,26 +74,26 @@ def _cfg(url='', **kwargs):
74
74
# Fiddling with configs / defaults / still pretraining
75
75
'coatnet_pico_rw_224' : _cfg (url = '' ),
76
76
'coatnet_nano_rw_224' : _cfg (
77
- url = '' ,
77
+ url = 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights-maxx/coatnet_nano_rw_224_sw-f53093b4.pth ' ,
78
78
crop_pct = 0.9 ),
79
79
'coatnet_0_rw_224' : _cfg (
80
- url = '' ),
80
+ url = 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights-maxx/coatnet_0_rw_224_sw-a6439706.pth ' ),
81
81
'coatnet_1_rw_224' : _cfg (
82
- url = ''
82
+ url = 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights-maxx/coatnet_1_rw_224_sw-5cae1ea8.pth '
83
83
),
84
84
'coatnet_2_rw_224' : _cfg (url = '' ),
85
85
86
86
# Highly experimental configs
87
87
'coatnet_bn_0_rw_224' : _cfg (
88
- url = '' ,
88
+ url = 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights-maxx/coatnet_bn_0_rw_224_sw-c228e218.pth ' ,
89
89
mean = IMAGENET_DEFAULT_MEAN , std = IMAGENET_DEFAULT_STD ,
90
90
crop_pct = 0.95 ),
91
91
'coatnet_rmlp_nano_rw_224' : _cfg (
92
- url = '' ,
92
+ url = 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights-maxx/coatnet_rmlp_nano_rw_224_sw-bd1d51b3.pth ' ,
93
93
crop_pct = 0.9 ),
94
94
'coatnet_rmlp_0_rw_224' : _cfg (url = '' ),
95
95
'coatnet_rmlp_1_rw_224' : _cfg (
96
- url = '' ),
96
+ url = 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights-maxx/coatnet_rmlp_1_rw_224_sw-9051e6c3.pth ' ),
97
97
'coatnet_nano_cc_224' : _cfg (url = '' ),
98
98
'coatnext_nano_rw_224' : _cfg (url = '' ),
99
99
@@ -107,10 +107,12 @@ def _cfg(url='', **kwargs):
107
107
108
108
# Experimental configs
109
109
'maxvit_pico_rw_256' : _cfg (url = '' , input_size = (3 , 256 , 256 ), pool_size = (8 , 8 )),
110
- 'maxvit_nano_rw_256' : _cfg (url = '' , input_size = (3 , 256 , 256 ), pool_size = (8 , 8 )),
110
+ 'maxvit_nano_rw_256' : _cfg (
111
+ url = 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights-maxx/maxvit_nano_rw_256_sw-3e790ce3.pth' ,
112
+ input_size = (3 , 256 , 256 ), pool_size = (8 , 8 )),
111
113
'maxvit_tiny_rw_224' : _cfg (url = '' ),
112
114
'maxvit_tiny_rw_256' : _cfg (url = '' , input_size = (3 , 256 , 256 ), pool_size = (8 , 8 )),
113
- 'maxvit_tiny_cm_256 ' : _cfg (url = '' , input_size = (3 , 256 , 256 ), pool_size = (8 , 8 )),
115
+ 'maxvit_tiny_pm_256 ' : _cfg (url = '' , input_size = (3 , 256 , 256 ), pool_size = (8 , 8 )),
114
116
'maxxvit_nano_rw_256' : _cfg (url = '' , input_size = (3 , 256 , 256 ), pool_size = (8 , 8 )),
115
117
116
118
# Trying to be like the MaxViT paper configs
@@ -131,7 +133,7 @@ class MaxxVitTransformerCfg:
131
133
attn_bias : bool = True
132
134
attn_drop : float = 0.
133
135
proj_drop : float = 0.
134
- pool_type : str = 'avg '
136
+ pool_type : str = 'avg2 '
135
137
rel_pos_type : str = 'bias'
136
138
rel_pos_dim : int = 512 # for relative position types w/ MLP
137
139
window_size : Tuple [int , int ] = (7 , 7 )
@@ -153,7 +155,7 @@ class MaxxVitConvCfg:
153
155
pre_norm_act : bool = False # activation after pre-norm
154
156
output_bias : bool = True # bias for shortcut + final 1x1 projection conv
155
157
stride_mode : str = 'dw' # stride done via one of 'pool', '1x1', 'dw'
156
- pool_type : str = 'avg '
158
+ pool_type : str = 'avg2 '
157
159
downsample_pool_type : str = 'avg2'
158
160
attn_early : bool = False # apply attn between conv2 and norm2, instead of after norm2
159
161
attn_layer : str = 'se'
@@ -241,7 +243,7 @@ def _rw_coat_cfg(
241
243
242
244
def _rw_max_cfg (
243
245
stride_mode = 'dw' ,
244
- pool_type = 'avg ' ,
246
+ pool_type = 'avg2 ' ,
245
247
conv_output_bias = False ,
246
248
conv_attn_ratio = 1 / 16 ,
247
249
conv_norm_layer = '' ,
@@ -325,7 +327,6 @@ def _next_cfg(
325
327
depths = (2 , 3 , 5 , 2 ),
326
328
stem_width = (32 , 64 ),
327
329
** _rw_max_cfg ( # using newer max defaults here
328
- pool_type = 'avg2' ,
329
330
conv_output_bias = True ,
330
331
conv_attn_ratio = 0.25 ,
331
332
),
@@ -336,7 +337,6 @@ def _next_cfg(
336
337
stem_width = (32 , 64 ),
337
338
** _rw_max_cfg ( # using newer max defaults here
338
339
stride_mode = 'pool' ,
339
- pool_type = 'avg2' ,
340
340
conv_output_bias = True ,
341
341
conv_attn_ratio = 0.25 ,
342
342
),
@@ -384,7 +384,6 @@ def _next_cfg(
384
384
depths = (3 , 4 , 6 , 3 ),
385
385
stem_width = (32 , 64 ),
386
386
** _rw_max_cfg (
387
- pool_type = 'avg2' ,
388
387
conv_output_bias = True ,
389
388
conv_attn_ratio = 0.25 ,
390
389
rel_pos_type = 'mlp' ,
@@ -487,10 +486,10 @@ def _next_cfg(
487
486
stem_width = (32 , 64 ),
488
487
** _rw_max_cfg (window_size = 8 ),
489
488
),
490
- maxvit_tiny_cm_256 = MaxxVitCfg (
489
+ maxvit_tiny_pm_256 = MaxxVitCfg (
491
490
embed_dim = (64 , 128 , 256 , 512 ),
492
491
depths = (2 , 2 , 5 , 2 ),
493
- block_type = ('CM ' ,) * 4 ,
492
+ block_type = ('PM ' ,) * 4 ,
494
493
stem_width = (32 , 64 ),
495
494
** _rw_max_cfg (window_size = 8 ),
496
495
),
@@ -663,13 +662,15 @@ def __init__(
663
662
bias : bool = True ,
664
663
):
665
664
super ().__init__ ()
666
- assert pool_type in ('max' , 'avg' , 'avg2' )
665
+ assert pool_type in ('max' , 'max2' , ' avg' , 'avg2' )
667
666
if pool_type == 'max' :
668
667
self .pool = nn .MaxPool2d (kernel_size = 3 , stride = 2 , padding = 1 )
668
+ elif pool_type == 'max2' :
669
+ self .pool = nn .MaxPool2d (2 ) # kernel_size == stride == 2
669
670
elif pool_type == 'avg' :
670
671
self .pool = nn .AvgPool2d (kernel_size = 3 , stride = 2 , padding = 1 , count_include_pad = False )
671
672
else :
672
- self .pool = nn .AvgPool2d (2 )
673
+ self .pool = nn .AvgPool2d (2 ) # kernel_size == stride == 2
673
674
674
675
if dim != dim_out :
675
676
self .expand = nn .Conv2d (dim , dim_out , 1 , bias = bias )
@@ -1073,7 +1074,7 @@ def forward(self, x):
1073
1074
return x
1074
1075
1075
1076
1076
- class CombinedPartitionAttention (nn .Module ):
1077
+ class ParallelPartitionAttention (nn .Module ):
1077
1078
""" Experimental. Grid and Block partition + single FFN
1078
1079
NxC tensor layout.
1079
1080
"""
@@ -1286,7 +1287,7 @@ def forward(self, x):
1286
1287
return x
1287
1288
1288
1289
1289
- class CombinedMaxxVitBlock (nn .Module ):
1290
+ class ParallelMaxxVitBlock (nn .Module ):
1290
1291
"""
1291
1292
"""
1292
1293
@@ -1309,7 +1310,7 @@ def __init__(
1309
1310
self .conv = nn .Sequential (* convs )
1310
1311
else :
1311
1312
self .conv = conv_cls (dim , dim_out , stride = stride , cfg = conv_cfg , drop_path = drop_path )
1312
- self .attn = CombinedPartitionAttention (dim = dim_out , cfg = transformer_cfg , drop_path = drop_path )
1313
+ self .attn = ParallelPartitionAttention (dim = dim_out , cfg = transformer_cfg , drop_path = drop_path )
1313
1314
1314
1315
def init_weights (self , scheme = '' ):
1315
1316
named_apply (partial (_init_transformer , scheme = scheme ), self .attn )
@@ -1343,7 +1344,7 @@ def __init__(
1343
1344
blocks = []
1344
1345
for i , t in enumerate (block_types ):
1345
1346
block_stride = stride if i == 0 else 1
1346
- assert t in ('C' , 'T' , 'M' , 'CM ' )
1347
+ assert t in ('C' , 'T' , 'M' , 'PM ' )
1347
1348
if t == 'C' :
1348
1349
conv_cls = ConvNeXtBlock if conv_cfg .block_type == 'convnext' else MbConvBlock
1349
1350
blocks += [conv_cls (
@@ -1372,8 +1373,8 @@ def __init__(
1372
1373
transformer_cfg = transformer_cfg ,
1373
1374
drop_path = drop_path [i ],
1374
1375
)]
1375
- elif t == 'CM ' :
1376
- blocks += [CombinedMaxxVitBlock (
1376
+ elif t == 'PM ' :
1377
+ blocks += [ParallelMaxxVitBlock (
1377
1378
in_chs ,
1378
1379
out_chs ,
1379
1380
stride = block_stride ,
@@ -1415,7 +1416,6 @@ def __init__(
1415
1416
self .norm1 = norm_act_layer (out_chs [0 ])
1416
1417
self .conv2 = create_conv2d (out_chs [0 ], out_chs [1 ], kernel_size , stride = 1 )
1417
1418
1418
- @torch .jit .ignore
1419
1419
def init_weights (self , scheme = '' ):
1420
1420
named_apply (partial (_init_conv , scheme = scheme ), self )
1421
1421
@@ -1659,8 +1659,8 @@ def maxvit_tiny_rw_256(pretrained=False, **kwargs):
1659
1659
1660
1660
1661
1661
@register_model
1662
- def maxvit_tiny_cm_256 (pretrained = False , ** kwargs ):
1663
- return _create_maxxvit ('maxvit_tiny_cm_256 ' , pretrained = pretrained , ** kwargs )
1662
+ def maxvit_tiny_pm_256 (pretrained = False , ** kwargs ):
1663
+ return _create_maxxvit ('maxvit_tiny_pm_256 ' , pretrained = pretrained , ** kwargs )
1664
1664
1665
1665
1666
1666
@register_model
0 commit comments