Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
"image_grid",
"descriptor_qas",
"add_kois",
"add_bev",
"reasoning",
"system_prompt",
],
Expand All @@ -44,7 +45,7 @@
"--dataset_split",
help="The dataset split to use for training / evaluation.",
type=str,
choices=["train", "val"],
choices=["train", "val", "test"],
default="val",
)
parser.add_argument(
Expand Down Expand Up @@ -89,7 +90,7 @@
)
elif args.eval:
resize_image_size = get_resize_image_size(
resize_factor=resize_factor, grid="image_grid" in args.approach
resize_factor=resize_factor, grid="image_grid" in args.approach, bev="add_bev" in args.approach, front_cam="front_cam" in args.approach,
)
logger.debug(f"Using resize image size: {resize_image_size}")
if is_cuda():
Expand Down
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -13,4 +13,5 @@ gdown~=5.2.0
pre-commit~=4.2.0
peft~=0.15.2
trl~=0.18.1
polars==1.31.0
ultralytics==8.3.168
3 changes: 3 additions & 0 deletions src/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
drivelm_dir = data_dir / "drivelm"
nuscenes_dir = data_dir / "nuscenes"
grid_dir = nuscenes_dir / "samples" / "GRID"
bev_dir = nuscenes_dir / "samples" / "BEV"
drivelm_train_json = drivelm_dir / "v1_1_train_nus.json"
drivelm_val_json = drivelm_dir / "v1_1_val_nus_q_only.json"
drivelm_test_json = drivelm_dir / "v1_1_test_nus.json"
Expand All @@ -20,6 +21,8 @@
IMAGE_SIZE[0] * GRID[0],
IMAGE_SIZE[1] * GRID[1],
) # (height, width)
BEV_IMG_SIZE = (500, 500)
BEV_AND_FRONT_CAM_IMG_SIZE = (500, 1388)

GRID_POSITIONS = {
"CAM_FRONT_LEFT": (0, 0),
Expand Down
30 changes: 28 additions & 2 deletions src/data/basic_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,12 @@

from src.constants import drivelm_dir
from src.data.create_image_grid_dataset import create_image_grid_dataset
from src.data.generate_bev import generate_bevs
from src.data.generate_descriptor_qas import (
generate_descriptor_qas,
)
from src.data.generate_reasoning_context import generate_reasoning_context
from src.data.get_sensor_calibration import get_calibration
from src.data.load_dataset import load_dataset
from src.data.message_formats import MessageFormat
from src.data.query_item import QueryItem
Expand Down Expand Up @@ -47,7 +49,9 @@ def __init__(
message_format: MessageFormat,
split="train",
add_augmented=False,
front_cam=False,
add_kois=False,
add_bev=False,
use_grid=False,
use_reasoning=False,
use_system_prompt=False,
Expand All @@ -58,8 +62,10 @@ def __init__(
):
self.message_format = message_format
self.split = split
self.front_cam = front_cam
self.use_reasoning = use_reasoning
self.use_grid = use_grid
self.add_bev = add_bev
self.resize_factor = resize_factor
self.system_prompt_provider = (
SystemPromptProvider(config_path=system_prompt_config_path)
Expand All @@ -70,13 +76,19 @@ def __init__(
data = load_dataset(split)

if split == "train":
if add_bev:
data = get_calibration(data)
data = generate_bevs(data, front_cam=front_cam)
data = normalise_key_object_infos(data, resize_factor, use_grid)

if split == "train" and add_augmented:
data = generate_descriptor_qas(data)

if split == "val" and add_kois:
if (split == "val" or split == "test") and add_kois:
data = generate_yolo_kois(data)
if add_bev:
data = get_calibration(data)
data = generate_bevs(data, front_cam=front_cam)
data = normalise_key_object_infos(data, resize_factor, use_grid)

if use_grid:
Expand All @@ -87,13 +99,17 @@ def __init__(
for scene_id in data.keys():
scene_obj = data[scene_id]["key_frames"]
for key_frame_id in scene_obj.keys():
# NOTE: Only consider FRONT camera images or GRID images for now
image_paths = scene_obj[key_frame_id]["image_paths"]
if use_grid:
image_path = os.path.join(
drivelm_dir,
image_paths["GRID"],
)
elif add_kois and add_bev:
image_path = os.path.join(
drivelm_dir,
image_paths["BEV"],
)
else:
image_path = os.path.join(
drivelm_dir,
Expand All @@ -111,6 +127,10 @@ def __init__(
else None
)

camera_calibration = None
if split=="val" and add_kois and add_bev:
camera_calibration = scene_obj[key_frame_id]["camera_calibration"]

qas = scene_obj[key_frame_id]["QA"]

qas_perception = qas["perception"]
Expand Down Expand Up @@ -154,6 +174,8 @@ def __init__(
"qa": remove_nones(qa),
"qa_type": qa_types[i],
"id": scene_id + "_" + key_frame_id + "_" + str(i),
"key_frame_id": key_frame_id,
"camera_calibration": camera_calibration,
"key_object_info": key_object_infos
if qa_types[i] != "perception"
else None,
Expand All @@ -175,6 +197,7 @@ def __getitem__(self, idx):
question = qa["qa"]["Q"]
answer = qa["qa"]["A"]
tags = qa["qa"].get("tag", [])
camera_calibration = qa["camera_calibration"]
key_object_info = qa["key_object_info"]
image_path = qa["image_path"]
system_prompt = (
Expand All @@ -183,6 +206,8 @@ def __getitem__(self, idx):
question=question,
resize_factor=self.resize_factor,
use_grid=self.use_grid,
add_bev=self.add_bev,
front_cam=self.front_cam,
use_reasoning=self.use_reasoning,
)
if self.system_prompt_provider
Expand All @@ -198,6 +223,7 @@ def __getitem__(self, idx):
key_object_info=key_object_info,
system_prompt=system_prompt,
ground_truth_answer=answer,
camera_calibration=camera_calibration,
)

if self.use_reasoning and self.split == "train":
Expand Down
Loading