cityscapes_pred.py

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import os

import cv2

import argparse
import numpy as np
import torch
from torch import nn
from torchvision import transforms
from torchvision.models import mobilenet_v2
from cityscapes2 import Cityscapes

parser = argparse.ArgumentParser(description='PyTorch Training Script')
parser.add_argument('--no-cuda', action='store_true', default=False,
                    help='disables CUDA training')
parser.add_argument('--seed', type=int, default=1, metavar='S',
                    help='random seed (default: 1)')

torch.cuda.empty_cache()
args = parser.parse_args()
args.cuda = not args.no_cuda and torch.cuda.is_available()
torch.manual_seed(args.seed)
if args.cuda:
    torch.cuda.manual_seed(args.seed)

cell_list = [[[512, 320], [576, 352]], [[576, 320], [640, 352]], [[640, 320], [704, 352]], [[704, 320], [768, 352]],
             [[768, 320], [832, 352]], [[832, 320], [896, 352]], [[896, 320], [960, 352]], [[960, 320], [1024, 352]],
             [[1024, 320], [1088, 352]], [[1088, 320], [1152, 352]], [[1152, 320], [1216, 352]],
             [[1216, 320], [1280, 352]], [[1280, 320], [1344, 352]], [[1344, 320], [1408, 352]],
             [[1408, 320], [1472, 352]], [[1472, 320], [1536, 352]], [[512, 352], [576, 384]], [[576, 352], [640, 384]],
             [[640, 352], [704, 384]], [[704, 352], [768, 384]], [[768, 352], [832, 384]], [[832, 352], [896, 384]],
             [[896, 352], [960, 384]], [[960, 352], [1024, 384]], [[1024, 352], [1088, 384]],
             [[1088, 352], [1152, 384]], [[1152, 352], [1216, 384]], [[1216, 352], [1280, 384]],
             [[1280, 352], [1344, 384]], [[1344, 352], [1408, 384]], [[1408, 352], [1472, 384]],
             [[1472, 352], [1536, 384]], [[512, 384], [576, 416]], [[576, 384], [640, 416]], [[640, 384], [704, 416]],
             [[704, 384], [768, 416]], [[768, 384], [832, 416]], [[832, 384], [896, 416]], [[896, 384], [960, 416]],
             [[960, 384], [1024, 416]], [[1024, 384], [1088, 416]], [[1088, 384], [1152, 416]],
             [[1152, 384], [1216, 416]], [[1216, 384], [1280, 416]], [[1280, 384], [1344, 416]],
             [[1344, 384], [1408, 416]], [[1408, 384], [1472, 416]], [[1472, 384], [1536, 416]],
             [[512, 416], [576, 448]], [[576, 416], [640, 448]], [[640, 416], [704, 448]], [[704, 416], [768, 448]],
             [[768, 416], [832, 448]], [[832, 416], [896, 448]], [[896, 416], [960, 448]], [[960, 416], [1024, 448]],
             [[1024, 416], [1088, 448]], [[1088, 416], [1152, 448]], [[1152, 416], [1216, 448]],
             [[1216, 416], [1280, 448]], [[1280, 416], [1344, 448]], [[1344, 416], [1408, 448]],
             [[1408, 416], [1472, 448]], [[1472, 416], [1536, 448]], [[512, 448], [576, 480]], [[576, 448], [640, 480]],
             [[640, 448], [704, 480]], [[704, 448], [768, 480]], [[768, 448], [832, 480]], [[832, 448], [896, 480]],
             [[896, 448], [960, 480]], [[960, 448], [1024, 480]], [[1024, 448], [1088, 480]],
             [[1088, 448], [1152, 480]], [[1152, 448], [1216, 480]], [[1216, 448], [1280, 480]],
             [[1280, 448], [1344, 480]], [[1344, 448], [1408, 480]], [[1408, 448], [1472, 480]],
             [[1472, 448], [1536, 480]], [[512, 480], [576, 512]], [[576, 480], [640, 512]], [[640, 480], [704, 512]],
             [[704, 480], [768, 512]], [[768, 480], [832, 512]], [[832, 480], [896, 512]], [[896, 480], [960, 512]],
             [[960, 480], [1024, 512]], [[1024, 480], [1088, 512]], [[1088, 480], [1152, 512]],
             [[1152, 480], [1216, 512]], [[1216, 480], [1280, 512]], [[1280, 480], [1344, 512]],
             [[1344, 480], [1408, 512]], [[1408, 480], [1472, 512]], [[1472, 480], [1536, 512]],
             [[512, 512], [576, 544]], [[576, 512], [640, 544]], [[640, 512], [704, 544]], [[704, 512], [768, 544]],
             [[768, 512], [832, 544]], [[832, 512], [896, 544]], [[896, 512], [960, 544]], [[960, 512], [1024, 544]],
             [[1024, 512], [1088, 544]], [[1088, 512], [1152, 544]], [[1152, 512], [1216, 544]],
             [[1216, 512], [1280, 544]], [[1280, 512], [1344, 544]], [[1344, 512], [1408, 544]],
             [[1408, 512], [1472, 544]], [[1472, 512], [1536, 544]], [[512, 544], [576, 576]], [[576, 544], [640, 576]],
             [[640, 544], [704, 576]], [[704, 544], [768, 576]], [[768, 544], [832, 576]], [[832, 544], [896, 576]],
             [[896, 544], [960, 576]], [[960, 544], [1024, 576]], [[1024, 544], [1088, 576]],
             [[1088, 544], [1152, 576]], [[1152, 544], [1216, 576]], [[1216, 544], [1280, 576]],
             [[1280, 544], [1344, 576]], [[1344, 544], [1408, 576]], [[1408, 544], [1472, 576]],
             [[1472, 544], [1536, 576]], [[512, 576], [576, 608]], [[576, 576], [640, 608]], [[640, 576], [704, 608]],
             [[704, 576], [768, 608]], [[768, 576], [832, 608]], [[832, 576], [896, 608]], [[896, 576], [960, 608]],
             [[960, 576], [1024, 608]], [[1024, 576], [1088, 608]], [[1088, 576], [1152, 608]],
             [[1152, 576], [1216, 608]], [[1216, 576], [1280, 608]], [[1280, 576], [1344, 608]],
             [[1344, 576], [1408, 608]], [[1408, 576], [1472, 608]], [[1472, 576], [1536, 608]],
             [[512, 608], [576, 640]], [[576, 608], [640, 640]], [[640, 608], [704, 640]], [[704, 608], [768, 640]],
             [[768, 608], [832, 640]], [[832, 608], [896, 640]], [[896, 608], [960, 640]], [[960, 608], [1024, 640]],
             [[1024, 608], [1088, 640]], [[1088, 608], [1152, 640]], [[1152, 608], [1216, 640]],
             [[1216, 608], [1280, 640]], [[1280, 608], [1344, 640]], [[1344, 608], [1408, 640]],
             [[1408, 608], [1472, 640]], [[1472, 608], [1536, 640]], [[0, 640], [128, 704]], [[128, 640], [256, 704]],
             [[256, 640], [384, 704]], [[384, 640], [512, 704]], [[512, 640], [640, 704]], [[640, 640], [768, 704]],
             [[768, 640], [896, 704]], [[896, 640], [1024, 704]], [[1024, 640], [1152, 704]],
             [[1152, 640], [1280, 704]], [[1280, 640], [1408, 704]], [[1408, 640], [1536, 704]],
             [[1536, 640], [1664, 704]], [[1664, 640], [1792, 704]], [[1792, 640], [1920, 704]],
             [[1920, 640], [2048, 704]], [[0, 704], [128, 768]], [[128, 704], [256, 768]], [[256, 704], [384, 768]],
             [[384, 704], [512, 768]], [[512, 704], [640, 768]], [[640, 704], [768, 768]], [[768, 704], [896, 768]],
             [[896, 704], [1024, 768]], [[1024, 704], [1152, 768]], [[1152, 704], [1280, 768]],
             [[1280, 704], [1408, 768]], [[1408, 704], [1536, 768]], [[1536, 704], [1664, 768]],
             [[1664, 704], [1792, 768]], [[1792, 704], [1920, 768]], [[1920, 704], [2048, 768]], [[0, 768], [128, 832]],
             [[128, 768], [256, 832]], [[256, 768], [384, 832]], [[384, 768], [512, 832]], [[512, 768], [640, 832]],
             [[640, 768], [768, 832]], [[768, 768], [896, 832]], [[896, 768], [1024, 832]], [[1024, 768], [1152, 832]],
             [[1152, 768], [1280, 832]], [[1280, 768], [1408, 832]], [[1408, 768], [1536, 832]],
             [[1536, 768], [1664, 832]], [[1664, 768], [1792, 832]], [[1792, 768], [1920, 832]],
             [[1920, 768], [2048, 832]], [[0, 832], [128, 896]], [[128, 832], [256, 896]], [[256, 832], [384, 896]],
             [[384, 832], [512, 896]], [[512, 832], [640, 896]], [[640, 832], [768, 896]], [[768, 832], [896, 896]],
             [[896, 832], [1024, 896]], [[1024, 832], [1152, 896]], [[1152, 832], [1280, 896]],
             [[1280, 832], [1408, 896]], [[1408, 832], [1536, 896]], [[1536, 832], [1664, 896]],
             [[1664, 832], [1792, 896]], [[1792, 832], [1920, 896]], [[1920, 832], [2048, 896]], [[0, 896], [128, 960]],
             [[128, 896], [256, 960]], [[256, 896], [384, 960]], [[384, 896], [512, 960]], [[512, 896], [640, 960]],
             [[640, 896], [768, 960]], [[768, 896], [896, 960]], [[896, 896], [1024, 960]], [[1024, 896], [1152, 960]],
             [[1152, 896], [1280, 960]], [[1280, 896], [1408, 960]], [[1408, 896], [1536, 960]],
             [[1536, 896], [1664, 960]], [[1664, 896], [1792, 960]], [[1792, 896], [1920, 960]],
             [[1920, 896], [2048, 960]], [[0, 960], [128, 1024]], [[128, 960], [256, 1024]], [[256, 960], [384, 1024]],
             [[384, 960], [512, 1024]], [[512, 960], [640, 1024]], [[640, 960], [768, 1024]], [[768, 960], [896, 1024]],
             [[896, 960], [1024, 1024]], [[1024, 960], [1152, 1024]], [[1152, 960], [1280, 1024]],
             [[1280, 960], [1408, 1024]], [[1408, 960], [1536, 1024]], [[1536, 960], [1664, 1024]],
             [[1664, 960], [1792, 1024]], [[1792, 960], [1920, 1024]], [[1920, 960], [2048, 1024]]]
interested_classes = [(11, 12), (13, 14, 15, 16, 17, 18),
                      (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 25, 21, 20, 19), (0, 23, 22, 24)]
NumCell = 256  # number of cells
NumClass = 3  # number of classes except background class
root = "Datasets/Cityscapes"
save_name = 'Cityscapes_mobilenet'  # name of the model
model = mobilenet_v2()  # load the model
model.classifier[1] = nn.Linear(1280, NumCell * (NumClass + 1))
model.load_state_dict(torch.load('Models/' + save_name + '.pth'))  # load the model

class_names = ["People", "Vehicle", "Other", "Road", "Background"]
color_box = [(31, 112, 255), (151, 157, 255), (56, 56, 255)]
val_dataset = Cityscapes(root, cell_list=cell_list, interested_classes=interested_classes, split='val',
                         target_type='semantic')


def pred_plot(frame, original, output):
    frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    output = output.detach().numpy()
    pred = np.where(output > 0.5, 1, 0).tolist()
    cell = 0
    normal = np.asarray([0] * NumClass + [1])
    for rect in cell_list:
        x1, y1 = rect[0]
        x2, y2 = rect[1]
        each = pred[cell:cell + NumClass + 1]
        eachScore = output[cell:cell + NumClass + 1]
        # each = orig[cell:cell + NumClass + 1]
        if not (each == normal).all():
            index = [i for i, x in enumerate(each) if x == 1]
            if len(index) == 0:
                index.append(eachScore.argmax())
                if eachScore.argmax() == NumClass:
                    continue
            center_x, center_y = (x1 + x2) // 2, (y1 + y2) // 2  # 计算矩形中心点
            poly_area = (x2 - x1) * (y2 - y1)  # 计算矩形面积
            default_text_scale = 0.4  # 这是默认的字体大小，可以根据你的需要进行调整
            texts = []  # 用于存储所有的文本和对应的大小
            max_text_len = len(index)  # 计算最长的文本长度
            if max_text_len > 1:
                text_scale = default_text_scale * min(1, np.sqrt(poly_area) / max_text_len)
            else:
                text_scale = min(max(poly_area / 10000, 0.3), 0.6)
            for i in index:
                text_size, _ = cv2.getTextSize(class_names[i], cv2.FONT_HERSHEY_SIMPLEX, text_scale, 2)
                texts.append((class_names[i], text_size, text_scale))
            text_origin = [center_x, center_y - sum(text[1][1] for text in texts) // 2]
            line_spacing = 0.7  # 行间距，可以根据需要调整
            color = color_box[index[0]]
            for text, text_size, text_scale in texts:
                text_origin[0] = center_x - text_size[0] // 2  # 每行的x坐标需要重新计算以保证居中
                text_origin[1] += int(text_size[1] * line_spacing)  # y坐标加上当前行文本的高度的一部分
                # cv2.putText(frame, text, tuple(text_origin), cv2.FONT_HERSHEY_SIMPLEX, text_scale, (255, 255, 255), 1)
                text_origin[1] += int(text_size[1] * line_spacing)  # y坐标再加上当前行文本的高度的一部分，为下一行文本做准备
            cv2.rectangle(frame, tuple(rect[0]), tuple(rect[1]), color=color, thickness=3)
        cell += NumClass + 1
    return frame


preprocess = transforms.Compose([transforms.ToTensor()])


def test():
    model.eval()
    with torch.no_grad():
        for batch_idx, (image, target, filename) in enumerate(val_dataset):
            resize_image = cv2.resize(image, (224, 224))
            input_tensor = preprocess(resize_image)
            input_batch = input_tensor.unsqueeze(0)
            output = model(input_batch)
            output = torch.sigmoid(output)
            target = torch.tensor(target, dtype=torch.float32)
            frame = pred_plot(image, torch.Tensor.cpu(target), output[0])

            cv2.imwrite(os.path.join(path, filename + ".jpg"), frame)


current_path = os.getcwd()
path = os.path.join(current_path, save_name)
if not os.path.exists(path):
    os.makedirs(path)
test()