Skip to content
This repository was archived by the owner on Aug 6, 2025. It is now read-only.
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Empty file added dino/__init__.py
Empty file.
26 changes: 13 additions & 13 deletions eval_copy_detection.py → dino/eval_copy_detection.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,9 @@
from PIL import Image, ImageFile
import numpy as np

import utils
import vision_transformer as vits
from eval_knn import extract_features
import dino.utils
import dino.vision_transformer as vits
from dino.eval_knn import extract_features


class CopydaysDataset():
Expand Down Expand Up @@ -161,7 +161,7 @@ def extract_features(image_list, model, args):
num_workers=args.num_workers, drop_last=False,
sampler=torch.utils.data.DistributedSampler(tempdataset, shuffle=False))
features = None
for samples, index in utils.MetricLogger(delimiter=" ").log_every(data_loader, 10):
for samples, index in dino.utils.MetricLogger(delimiter=" ").log_every(data_loader, 10):
samples, index = samples.cuda(non_blocking=True), index.cuda(non_blocking=True)
feats = model.get_intermediate_layers(samples, n=1)[0].clone()

Expand Down Expand Up @@ -215,7 +215,7 @@ def extract_features(image_list, model, args):
parser.add_argument('--imsize', default=320, type=int, help='Image size (square image)')
parser.add_argument('--batch_size_per_gpu', default=16, type=int, help='Per-GPU batch-size')
parser.add_argument('--pretrained_weights', default='', type=str, help="Path to pretrained weights to evaluate.")
parser.add_argument('--use_cuda', default=True, type=utils.bool_flag)
parser.add_argument('--use_cuda', default=True, type=dino.utils.bool_flag)
parser.add_argument('--arch', default='vit_base', type=str, help='Architecture')
parser.add_argument('--patch_size', default=8, type=int, help='Patch resolution of the model.')
parser.add_argument("--checkpoint_key", default="teacher", type=str,
Expand All @@ -226,8 +226,8 @@ def extract_features(image_list, model, args):
parser.add_argument("--local_rank", default=0, type=int, help="Please ignore and do not set this argument.")
args = parser.parse_args()

utils.init_distributed_mode(args)
print("git:\n {}\n".format(utils.get_sha()))
dino.utils.init_distributed_mode(args)
print("git:\n {}\n".format(dino.utils.get_sha()))
print("\n".join("%s: %s" % (k, str(v)) for k, v in sorted(dict(vars(args)).items())))
cudnn.benchmark = True

Expand All @@ -241,7 +241,7 @@ def extract_features(image_list, model, args):
if args.use_cuda:
model.cuda()
model.eval()
utils.load_pretrained_weights(model, args.pretrained_weights, args.checkpoint_key, args.arch, args.patch_size)
dino.utils.load_pretrained_weights(model, args.pretrained_weights, args.checkpoint_key, args.arch, args.patch_size)

dataset = CopydaysDataset(args.data_path)

Expand All @@ -250,7 +250,7 @@ def extract_features(image_list, model, args):
queries = []
for q in dataset.query_blocks:
queries.append(extract_features(dataset.get_block(q), model, args))
if utils.get_rank() == 0:
if dino.utils.get_rank() == 0:
queries = torch.cat(queries)
print(f"Extraction of queries features done. Shape: {queries.shape}")

Expand All @@ -264,7 +264,7 @@ def extract_features(image_list, model, args):
print("Using distractors...")
list_distractors = [os.path.join(args.distractors_path, s) for s in os.listdir(args.distractors_path) if is_image_file(s)]
database.append(extract_features(list_distractors, model, args))
if utils.get_rank() == 0:
if dino.utils.get_rank() == 0:
database = torch.cat(database)
print(f"Extraction of database and distractors features done. Shape: {database.shape}")

Expand All @@ -273,20 +273,20 @@ def extract_features(image_list, model, args):
print(f"Extracting features on images from {args.whitening_path} for learning the whitening operator.")
list_whit = [os.path.join(args.whitening_path, s) for s in os.listdir(args.whitening_path) if is_image_file(s)]
features_for_whitening = extract_features(list_whit, model, args)
if utils.get_rank() == 0:
if dino.utils.get_rank() == 0:
# center
mean_feature = torch.mean(features_for_whitening, dim=0)
database -= mean_feature
queries -= mean_feature
pca = utils.PCA(dim=database.shape[-1], whit=0.5)
pca = dino.utils.PCA(dim=database.shape[-1], whit=0.5)
# compute covariance
cov = torch.mm(features_for_whitening.T, features_for_whitening) / features_for_whitening.shape[0]
pca.train_pca(cov.cpu().numpy())
database = pca.apply(database)
queries = pca.apply(queries)

# ============ Copy detection ... ============
if utils.get_rank() == 0:
if dino.utils.get_rank() == 0:
# l2 normalize the features
database = nn.functional.normalize(database, dim=1, p=2)
queries = nn.functional.normalize(queries, dim=1, p=2)
Expand Down
20 changes: 10 additions & 10 deletions eval_image_retrieval.py → dino/eval_image_retrieval.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,9 @@
from PIL import Image, ImageFile
import numpy as np

import utils
import vision_transformer as vits
from eval_knn import extract_features
import dino.utils
import dino.vision_transformer as vits
from dino.eval_knn import extract_features


class OxfordParisDataset(torch.utils.data.Dataset):
Expand Down Expand Up @@ -83,10 +83,10 @@ def config_qimname(cfg, i):
parser = argparse.ArgumentParser('Image Retrieval on revisited Paris and Oxford')
parser.add_argument('--data_path', default='/path/to/revisited_paris_oxford/', type=str)
parser.add_argument('--dataset', default='roxford5k', type=str, choices=['roxford5k', 'rparis6k'])
parser.add_argument('--multiscale', default=False, type=utils.bool_flag)
parser.add_argument('--multiscale', default=False, type=dino.utils.bool_flag)
parser.add_argument('--imsize', default=224, type=int, help='Image size')
parser.add_argument('--pretrained_weights', default='', type=str, help="Path to pretrained weights to evaluate.")
parser.add_argument('--use_cuda', default=True, type=utils.bool_flag)
parser.add_argument('--use_cuda', default=True, type=dino.utils.bool_flag)
parser.add_argument('--arch', default='vit_small', type=str, help='Architecture')
parser.add_argument('--patch_size', default=16, type=int, help='Patch resolution of the model.')
parser.add_argument("--checkpoint_key", default="teacher", type=str,
Expand All @@ -97,8 +97,8 @@ def config_qimname(cfg, i):
parser.add_argument("--local_rank", default=0, type=int, help="Please ignore and do not set this argument.")
args = parser.parse_args()

utils.init_distributed_mode(args)
print("git:\n {}\n".format(utils.get_sha()))
dino.utils.init_distributed_mode(args)
print("git:\n {}\n".format(dino.utils.get_sha()))
print("\n".join("%s: %s" % (k, str(v)) for k, v in sorted(dict(vars(args)).items())))
cudnn.benchmark = True

Expand Down Expand Up @@ -165,7 +165,7 @@ def config_qimname(cfg, i):
train_features = extract_features(model, data_loader_train, args.use_cuda, multiscale=args.multiscale)
query_features = extract_features(model, data_loader_query, args.use_cuda, multiscale=args.multiscale)

if utils.get_rank() == 0: # only rank 0 will work from now on
if dino.utils.get_rank() == 0: # only rank 0 will work from now on
# normalize features
train_features = nn.functional.normalize(train_features, dim=1, p=2)
query_features = nn.functional.normalize(query_features, dim=1, p=2)
Expand All @@ -187,15 +187,15 @@ def config_qimname(cfg, i):
g['ok'] = np.concatenate([gnd[i]['easy'], gnd[i]['hard']])
g['junk'] = np.concatenate([gnd[i]['junk']])
gnd_t.append(g)
mapM, apsM, mprM, prsM = utils.compute_map(ranks, gnd_t, ks)
mapM, apsM, mprM, prsM = dino.utils.compute_map(ranks, gnd_t, ks)
# search for hard
gnd_t = []
for i in range(len(gnd)):
g = {}
g['ok'] = np.concatenate([gnd[i]['hard']])
g['junk'] = np.concatenate([gnd[i]['junk'], gnd[i]['easy']])
gnd_t.append(g)
mapH, apsH, mprH, prsH = utils.compute_map(ranks, gnd_t, ks)
mapH, apsH, mprH, prsH = dino.utils.compute_map(ranks, gnd_t, ks)
print('>> {}: mAP M: {}, H: {}'.format(args.dataset, np.around(mapM*100, decimals=2), np.around(mapH*100, decimals=2)))
print('>> {}: mP@k{} M: {}, H: {}'.format(args.dataset, np.array(ks), np.around(mprM*100, decimals=2), np.around(mprH*100, decimals=2)))
dist.barrier()
20 changes: 10 additions & 10 deletions eval_knn.py → dino/eval_knn.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,8 @@
from torchvision import transforms as pth_transforms
from torchvision import models as torchvision_models

import utils
import vision_transformer as vits
import dino.utils
import dino.vision_transformer as vits


def extract_feature_pipeline(args):
Expand Down Expand Up @@ -68,7 +68,7 @@ def extract_feature_pipeline(args):
print(f"Architecture {args.arch} non supported")
sys.exit(1)
model.cuda()
utils.load_pretrained_weights(model, args.pretrained_weights, args.checkpoint_key, args.arch, args.patch_size)
dino.utils.load_pretrained_weights(model, args.pretrained_weights, args.checkpoint_key, args.arch, args.patch_size)
model.eval()

# ============ extract features ... ============
Expand All @@ -77,7 +77,7 @@ def extract_feature_pipeline(args):
print("Extracting features for val set...")
test_features = extract_features(model, data_loader_val, args.use_cuda)

if utils.get_rank() == 0:
if dino.utils.get_rank() == 0:
train_features = nn.functional.normalize(train_features, dim=1, p=2)
test_features = nn.functional.normalize(test_features, dim=1, p=2)

Expand All @@ -94,13 +94,13 @@ def extract_feature_pipeline(args):

@torch.no_grad()
def extract_features(model, data_loader, use_cuda=True, multiscale=False):
metric_logger = utils.MetricLogger(delimiter=" ")
metric_logger = dino.utils.MetricLogger(delimiter=" ")
features = None
for samples, index in metric_logger.log_every(data_loader, 10):
samples = samples.cuda(non_blocking=True)
index = index.cuda(non_blocking=True)
if multiscale:
feats = utils.multi_scale(samples, model)
feats = dino.utils.multi_scale(samples, model)
else:
feats = model(samples).clone()

Expand Down Expand Up @@ -196,7 +196,7 @@ def __getitem__(self, idx):
parser.add_argument('--temperature', default=0.07, type=float,
help='Temperature used in the voting coefficient')
parser.add_argument('--pretrained_weights', default='', type=str, help="Path to pretrained weights to evaluate.")
parser.add_argument('--use_cuda', default=True, type=utils.bool_flag,
parser.add_argument('--use_cuda', default=True, type=dino.utils.bool_flag,
help="Should we store the features on GPU? We recommend setting this to False if you encounter OOM")
parser.add_argument('--arch', default='vit_small', type=str, help='Architecture')
parser.add_argument('--patch_size', default=16, type=int, help='Patch resolution of the model.')
Expand All @@ -213,8 +213,8 @@ def __getitem__(self, idx):
parser.add_argument('--data_path', default='/path/to/imagenet/', type=str)
args = parser.parse_args()

utils.init_distributed_mode(args)
print("git:\n {}\n".format(utils.get_sha()))
dino.utils.init_distributed_mode(args)
print("git:\n {}\n".format(dino.utils.get_sha()))
print("\n".join("%s: %s" % (k, str(v)) for k, v in sorted(dict(vars(args)).items())))
cudnn.benchmark = True

Expand All @@ -227,7 +227,7 @@ def __getitem__(self, idx):
# need to extract features !
train_features, test_features, train_labels, test_labels = extract_feature_pipeline(args)

if utils.get_rank() == 0:
if dino.utils.get_rank() == 0:
if args.use_cuda:
train_features = train_features.cuda()
test_features = test_features.cuda()
Expand Down
30 changes: 15 additions & 15 deletions eval_linear.py → dino/eval_linear.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,13 +24,13 @@
from torchvision import transforms as pth_transforms
from torchvision import models as torchvision_models

import utils
import vision_transformer as vits
import dino.utils
import dino.vision_transformer as vits


def eval_linear(args):
utils.init_distributed_mode(args)
print("git:\n {}\n".format(utils.get_sha()))
dino.utils.init_distributed_mode(args)
print("git:\n {}\n".format(dino.utils.get_sha()))
print("\n".join("%s: %s" % (k, str(v)) for k, v in sorted(dict(vars(args)).items())))
cudnn.benchmark = True

Expand All @@ -54,7 +54,7 @@ def eval_linear(args):
model.cuda()
model.eval()
# load weights to evaluate
utils.load_pretrained_weights(model, args.pretrained_weights, args.checkpoint_key, args.arch, args.patch_size)
dino.utils.load_pretrained_weights(model, args.pretrained_weights, args.checkpoint_key, args.arch, args.patch_size)
print(f"Model {args.arch} built.")

linear_classifier = LinearClassifier(embed_dim, num_labels=args.num_labels)
Expand All @@ -77,7 +77,7 @@ def eval_linear(args):
)

if args.evaluate:
utils.load_pretrained_linear_weights(linear_classifier, args.arch, args.patch_size)
dino.utils.load_pretrained_linear_weights(linear_classifier, args.arch, args.patch_size)
test_stats = validate_network(val_loader, model, linear_classifier, args.n_last_blocks, args.avgpool_patchtokens)
print(f"Accuracy of the network on the {len(dataset_val)} test images: {test_stats['acc1']:.1f}%")
return
Expand All @@ -102,15 +102,15 @@ def eval_linear(args):
# set optimizer
optimizer = torch.optim.SGD(
linear_classifier.parameters(),
args.lr * (args.batch_size_per_gpu * utils.get_world_size()) / 256., # linear scaling rule
args.lr * (args.batch_size_per_gpu * dino.utils.get_world_size()) / 256., # linear scaling rule
momentum=0.9,
weight_decay=0, # we do not apply weight decay
)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, args.epochs, eta_min=0)

# Optionally resume from a checkpoint
to_restore = {"epoch": 0, "best_acc": 0.}
utils.restart_from_checkpoint(
dino.utils.restart_from_checkpoint(
os.path.join(args.output_dir, "checkpoint.pth.tar"),
run_variables=to_restore,
state_dict=linear_classifier,
Expand All @@ -135,7 +135,7 @@ def eval_linear(args):
print(f'Max accuracy so far: {best_acc:.2f}%')
log_stats = {**{k: v for k, v in log_stats.items()},
**{f'test_{k}': v for k, v in test_stats.items()}}
if utils.is_main_process():
if dino.utils.is_main_process():
with (Path(args.output_dir) / "log.txt").open("a") as f:
f.write(json.dumps(log_stats) + "\n")
save_dict = {
Expand All @@ -152,8 +152,8 @@ def eval_linear(args):

def train(model, linear_classifier, optimizer, loader, epoch, n, avgpool):
linear_classifier.train()
metric_logger = utils.MetricLogger(delimiter=" ")
metric_logger.add_meter('lr', utils.SmoothedValue(window_size=1, fmt='{value:.6f}'))
metric_logger = dino.utils.MetricLogger(delimiter=" ")
metric_logger.add_meter('lr', dino.utils.SmoothedValue(window_size=1, fmt='{value:.6f}'))
header = 'Epoch: [{}]'.format(epoch)
for (inp, target) in metric_logger.log_every(loader, 20, header):
# move to gpu
Expand Down Expand Up @@ -195,7 +195,7 @@ def train(model, linear_classifier, optimizer, loader, epoch, n, avgpool):
@torch.no_grad()
def validate_network(val_loader, model, linear_classifier, n, avgpool):
linear_classifier.eval()
metric_logger = utils.MetricLogger(delimiter=" ")
metric_logger = dino.utils.MetricLogger(delimiter=" ")
header = 'Test:'
for inp, target in metric_logger.log_every(val_loader, 20, header):
# move to gpu
Expand All @@ -216,9 +216,9 @@ def validate_network(val_loader, model, linear_classifier, n, avgpool):
loss = nn.CrossEntropyLoss()(output, target)

if linear_classifier.module.num_labels >= 5:
acc1, acc5 = utils.accuracy(output, target, topk=(1, 5))
acc1, acc5 = dino.utils.accuracy(output, target, topk=(1, 5))
else:
acc1, = utils.accuracy(output, target, topk=(1,))
acc1, = dino.utils.accuracy(output, target, topk=(1,))

batch_size = inp.shape[0]
metric_logger.update(loss=loss.item())
Expand Down Expand Up @@ -255,7 +255,7 @@ def forward(self, x):
parser = argparse.ArgumentParser('Evaluation with linear classification on ImageNet')
parser.add_argument('--n_last_blocks', default=4, type=int, help="""Concatenate [CLS] tokens
for the `n` last blocks. We use `n=4` when evaluating ViT-Small and `n=1` with ViT-Base.""")
parser.add_argument('--avgpool_patchtokens', default=False, type=utils.bool_flag,
parser.add_argument('--avgpool_patchtokens', default=False, type=dino.utils.bool_flag,
help="""Whether ot not to concatenate the global average pooled features to the [CLS] token.
We typically set this to False for ViT-Small and to True with ViT-Base.""")
parser.add_argument('--arch', default='vit_small', type=str, help='Architecture')
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,8 @@
from PIL import Image
from torchvision import transforms

import utils
import vision_transformer as vits
import dino.utils
import dino.vision_transformer as vits


@torch.no_grad()
Expand Down Expand Up @@ -264,14 +264,14 @@ def color_normalize(x, mean=[0.485, 0.456, 0.406], std=[0.228, 0.224, 0.225]):
parser.add_argument("--bs", type=int, default=6, help="Batch size, try to reduce if OOM")
args = parser.parse_args()

print("git:\n {}\n".format(utils.get_sha()))
print("git:\n {}\n".format(dino.utils.get_sha()))
print("\n".join("%s: %s" % (k, str(v)) for k, v in sorted(dict(vars(args)).items())))

# building network
model = vits.__dict__[args.arch](patch_size=args.patch_size, num_classes=0)
print(f"Model {args.arch} {args.patch_size}x{args.patch_size} built.")
model.cuda()
utils.load_pretrained_weights(model, args.pretrained_weights, args.checkpoint_key, args.arch, args.patch_size)
dino.utils.load_pretrained_weights(model, args.pretrained_weights, args.checkpoint_key, args.arch, args.patch_size)
for param in model.parameters():
param.requires_grad = False
model.eval()
Expand Down
2 changes: 1 addition & 1 deletion hubconf.py → dino/hubconf.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
import torch
from torchvision.models.resnet import resnet50

import vision_transformer as vits
import dino.vision_transformer as vits

dependencies = ["torch", "torchvision"]

Expand Down
Loading