Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file added spatial_server/.DS_Store
Binary file not shown.
124 changes: 124 additions & 0 deletions spatial_server/confidence.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
import os
from pathlib import Path
import sys
import h5py
from collections import defaultdict

import pycolmap
import numpy as np
from scipy.spatial.transform import Rotation
import torch
import torch.nn as nn
import torch.optim as optim

from torch.utils.data import DataLoader, Dataset
from torchvision import transforms
import random
import clip
from PIL import Image
import os
import pandas as pd
import matplotlib.pyplot as plt
from tqdm import tqdm
from torch.cuda.amp import autocast, GradScaler
from train_model import ProjectionHead

from tqdm import tqdm

import pickle

torch_hub_dir = Path('data/torch_hub')
if not torch_hub_dir.exists():
torch_hub_dir.mkdir(parents=True)
torch.hub.set_dir(str(torch_hub_dir))

def encode_map(map, device, model, preprocess, train):
# model, preprocess = clip.load("ViT-L/14@336px", device=device)
if train:
model.load_state_dict(torch.load(f"models/{map}_ViTL14-336px.pth", weights_only=True))
# Create and load the projection head
projection_head = ProjectionHead(model.visual.output_dim, 512, 256).to(device)
projection_head.load_state_dict(torch.load(f"models/{map}_projection_head.pth", weights_only=True))

map_path = f"Photos_split/train/{map}"

image_list = []
image_names = []

for filename in os.listdir(map_path):
if filename.endswith(".jpg") or filename.endswith(".png") or filename.endswith(".jpeg"): # Adjust file extensions as needed
image_path = os.path.join(map_path, filename)
processed_image = preprocess(Image.open(image_path).convert('RGB'))
image_list.append(processed_image)
image_names.append(filename)

image_batch = torch.stack(image_list, dim=0).to(device)

with torch.no_grad():
features = model.encode_image(image_batch)
if train:
projected_features = projection_head(features.float())
features = projected_features / projected_features.norm(dim=-1, keepdim=True)

#Save embeddings
embeddings = {"image_names": image_names, "projected_features": features}
torch.save(embeddings, f"embeddings/{map}_embeddings.pt" if train else f"untrain_embeddings/{map}_embeddings.pt")

def get_confidence(map, query_path, device, model, preprocess, train):
if train:
model.load_state_dict(torch.load(f"models/{map}_ViTL14-336px.pth", weights_only=True))
# Create and load the projection head
projection_head = ProjectionHead(model.visual.output_dim, 512, 256).to(device)
projection_head.load_state_dict(torch.load(f"models/{map}_projection_head.pth", weights_only=True))

embeddings = torch.load(f"embeddings/{map}_embeddings.pt") if train else torch.load(f"untrain_embeddings/{map}_embeddings.pt")

image = preprocess(Image.open(query_path)).unsqueeze(0).to(device)
with torch.no_grad():
img_features = model.encode_image(image)

if train:
projected_img_features = projection_head(img_features.float())
img_features = projected_img_features / projected_img_features.norm(dim=-1, keepdim=True)
# print(projected_img_features.shape)
# print(datasets[data_set_name]['images_features'].shape)
similarity = torch.nn.functional.cosine_similarity(img_features, embeddings["projected_features"])
top1, idx = torch.topk(similarity, 1, dim=0)
return top1.tolist()[0], embeddings["image_names"][idx.tolist()[0]]


rooms = ['HOA 107', 'POS 145', 'POS 146', 'POS 147', 'POS 151', 'POS 153']
model, preprocess = clip.load("ViT-L/14@336px", device='cpu')

for room in rooms:
confidences = {room: [] for room in rooms}
print(f"\nProcessing room: {room}")

image_files = [f for f in os.listdir(os.path.join('Photos_split/test', room)) if f.endswith(".jpg")]
for f in tqdm(image_files, desc=f"{room} images", leave=False):
img_path = os.path.join('Photos_split/test', room, f)
for roomm in rooms:
confidence, _ = get_confidence(roomm, img_path, 'cpu', model, preprocess, False)
confidences[roomm].append(confidence)

# Save the confidences dict as a pickle file
output_path = os.path.join('untrain_confidence_data', f'{room}_confidences.pkl')
os.makedirs('untrain_confidence_data', exist_ok=True)
with open(output_path, 'wb') as f:
pickle.dump(confidences, f)

# --- Create a boxplot after finishing this room ---
plt.figure(figsize=(10, 6))
plt.boxplot(confidences.values(), labels=confidences.keys())
plt.title(f'Confidence scores for images in "{room}" vs other rooms')
plt.xlabel('Other Rooms')
plt.ylabel('Confidence Score')
plt.grid(True)
plt.tight_layout()
plt.savefig(os.path.join('untrain_plots', room))


# for f in os.listdir('Photos_split/train'):
# encode_map(f, 'cpu', model, preprocess, False)


257 changes: 257 additions & 0 deletions spatial_server/train_model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,257 @@
import os
import shutil
from pathlib import Path
import sys
from collections import defaultdict

import torch
import torch.nn as nn
import torch.optim as optim

from torch.utils.data import DataLoader, Dataset
from torchvision import transforms
import random
import clip
from PIL import Image
import os
import pandas as pd
import matplotlib.pyplot as plt
from tqdm import tqdm
from torch.cuda.amp import autocast, GradScaler
import pillow_heif
pillow_heif.register_heif_opener()


torch_hub_dir = Path('data/torch_hub')
if not torch_hub_dir.exists():
torch_hub_dir.mkdir(parents=True)
torch.hub.set_dir(str(torch_hub_dir))


class ContrastiveDataset(Dataset):
def __init__(self, root_dir, anchor_folder, transform=None):
self.root_dir = root_dir
self.anchor_folder = anchor_folder
self.transform = transform

self.anchor_images = sorted([f for f in os.listdir(os.path.join(root_dir, anchor_folder))
if f.lower().endswith(('.png', '.jpg', '.jpeg', '.heif'))])
# print(self.anchor_images)

self.other_folders = [f for f in os.listdir(root_dir)
if not f.startswith('.') and os.path.isdir(os.path.join(root_dir, f)) and f != anchor_folder]
# print(self.other_folders)

self.other_images = {}
for folder in self.other_folders:
self.other_images[folder] = [f for f in os.listdir(os.path.join(root_dir, folder))
if f.lower().endswith(('.png', '.jpg', '.jpeg', '.heif'))]
# print(self.other_images)

def __len__(self):
return len(self.anchor_images)

def __getitem__(self, idx):
# Anchor image (always from folder1)
anchor_img = self.anchor_images[idx]
anchor_path = os.path.join(self.root_dir, self.anchor_folder, anchor_img)
anchor = Image.open(anchor_path).convert('RGB')

# Positive image (different image from folder1)
positive_img = random.choice([img for img in self.anchor_images if img != anchor_img])
positive_path = os.path.join(self.root_dir, self.anchor_folder, positive_img)
positive = Image.open(positive_path).convert('RGB')
# print(anchor_path, positive_path)

# Negative image (from a different folder)
negative_folder = random.choice(self.other_folders)
negative_img = random.choice(self.other_images[negative_folder])
negative_path = os.path.join(self.root_dir, negative_folder, negative_img)
# print(negative_path)
negative = Image.open(negative_path).convert('RGB')

if self.transform:
anchor = self.transform(anchor)
positive = self.transform(positive)
negative = self.transform(negative)

return anchor, positive, negative


class CosineSimilarityContrastiveLoss(nn.Module):
def __init__(self, margin=0.6, negative_weight=1.2):
super().__init__()
self.margin = margin
self.negative_weight = negative_weight

def forward(self, anchor, positive, negative):
cos = nn.CosineSimilarity(dim=1)

similarity_positive = cos(anchor, positive)
similarity_negative = cos(anchor, negative)

losses = torch.relu(self.margin - (similarity_positive - similarity_negative * self.negative_weight))
return losses.mean()


class ProjectionHead(nn.Module):
def __init__(self, input_dim, hidden_dim, output_dim):
super().__init__()
self.projection = nn.Sequential(
nn.Linear(input_dim, hidden_dim),
nn.ReLU(),
nn.Linear(hidden_dim, output_dim)
)

def forward(self, x):
x = self.projection(x.float())
return x / x.norm(dim=-1, keepdim=True)


def train_model(map, num_epochs=20, batch_size=16, lr=1e-5):
# dataset_names = []
# for dataset in os.listdir("/code/data/map_data/"):
# if not dataset.startswith('.'): dataset_names.append(dataset)

# print(dataset_names)

# datasets = {}

# for dataset_name in dataset_names:
# paths = {}
# paths['querys_path'] = f'/code/data/query_data/{dataset_name}/images'
# paths['imgs_path'] = f'/code/data/map_data/{dataset_name}/images'
# datasets[dataset_name] = paths

# print(datasets)

# Set device
device = "cuda" if torch.cuda.is_available() else "cpu"

# Load pre-trained CLIP model
model, preprocess = clip.load("ViT-L/14@336px", device=device)

# Enable gradient checkpointing
model.visual.transformer.grad_checkpointing = True

# Freeze most of the model, only fine-tune the last few layers
for param in model.parameters():
param.requires_grad = False

# Unfreeze the last few layers (adjust as needed)
for param in model.visual.transformer.resblocks[-2:].parameters():
param.requires_grad = True

# Create projection head
projection_head = ProjectionHead(model.visual.output_dim, 512, 256).to(device)

# Prepare dataset and dataloader
root_dir = "Photos_split/train" # This should contain your folders
transform = transforms.Compose([
preprocess,
transforms.Lambda(lambda x: x.squeeze(0)) # Remove batch dimension added by CLIP's preprocess
])
anchor_folder = map

dataset = ContrastiveDataset(root_dir, anchor_folder, transform=transform)
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True) # Reduced batch size

# Initialize contrastive loss
criterion = CosineSimilarityContrastiveLoss()

# Adjust optimizer to only update unfrozen parameters
params_to_optimize = [p for p in list(model.parameters()) + list(projection_head.parameters()) if p.requires_grad]
optimizer = optim.Adam(params_to_optimize, lr=lr)

for epoch in range(num_epochs):
model.train()
projection_head.train()
total_loss = 0
progress_bar = tqdm(enumerate(dataloader), total=len(dataloader), desc=f"Epoch {epoch+1}/{num_epochs}")
for i, batch in progress_bar:
anchor, positive, negative = [x.to(device) for x in batch]

with torch.no_grad():
anchor_features = model.encode_image(anchor)
positive_features = model.encode_image(positive)
negative_features = model.encode_image(negative)

anchor_features = projection_head(anchor_features.float())
positive_features = projection_head(positive_features.float())
negative_features = projection_head(negative_features.float())

# Normalize features
anchor_features = anchor_features / anchor_features.norm(dim=-1, keepdim=True)
positive_features = positive_features / positive_features.norm(dim=-1, keepdim=True)
negative_features = negative_features / negative_features.norm(dim=-1, keepdim=True)

loss = criterion(anchor_features, positive_features, negative_features)

optimizer.zero_grad()
loss.backward()
torch.nn.utils.clip_grad_norm_(params_to_optimize, max_norm=1.0)
optimizer.step()

total_loss += loss.item()
progress_bar.set_postfix(loss=loss.item())

avg_loss = total_loss / len(dataloader)
print(f"Epoch [{epoch+1}/{num_epochs}], Average Loss: {avg_loss:.4f}")

# Save the fine-tuned model
torch.save(model.state_dict(), f"models/{map}_ViTL14-336px.pth")
torch.save(projection_head.state_dict(), f"models/{map}_projection_head.pth")

def split_dataset(base_dir='Photos', output_dir='Photos_split', train_ratio=0.8):
# Ensure reproducibility
random.seed(42)

# Create train/test directories
train_dir = os.path.join(output_dir, 'train')
test_dir = os.path.join(output_dir, 'test')
os.makedirs(train_dir, exist_ok=True)
os.makedirs(test_dir, exist_ok=True)

for folder_name in os.listdir(base_dir):
folder_path = os.path.join(base_dir, folder_name)
if not os.path.isdir(folder_path):
continue

images = [f for f in os.listdir(folder_path) if os.path.isfile(os.path.join(folder_path, f))]
random.shuffle(images)

split_idx = int(len(images) * train_ratio)
train_images = images[:split_idx]
test_images = images[split_idx:]

# Create class folders
train_class_dir = os.path.join(train_dir, folder_name)
test_class_dir = os.path.join(test_dir, folder_name)
os.makedirs(train_class_dir, exist_ok=True)
os.makedirs(test_class_dir, exist_ok=True)

# Copy files
for img in train_images:
shutil.copy2(os.path.join(folder_path, img), os.path.join(train_class_dir, img))
for img in test_images:
shutil.copy2(os.path.join(folder_path, img), os.path.join(test_class_dir, img))

print("Dataset split complete.")

def convert_heic_to_jpg(folder):
for root, _, files in os.walk(folder):
for file in files:
if file.lower().endswith(".heic"):
heic_path = os.path.join(root, file)
jpg_path = os.path.splitext(heic_path)[0] + ".jpg"
try:
img = Image.open(heic_path)
img.save(jpg_path, "JPEG")
print(f"Converted: {file}")
except Exception as e:
print(f"Failed to convert {file}: {e}")

# convert_heic_to_jpg("Photos_split/test")


# train_model("POS 153")