Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Feature] Add MultiHuman pipeline #294

Open
wants to merge 19 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions mmhuman3d/data/data_converters/base_converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,9 +92,9 @@ class BaseModeConverter(BaseConverter):
modes (list): the modes of data for converter
"""

def convert(self, dataset_path: str, out_path: str, **kwargs):
def convert(self, dataset_path: str, out_path: str, *args, **kwargs):
for mode in self.modes:
self.convert_by_mode(dataset_path, out_path, mode, **kwargs)
self.convert_by_mode(dataset_path, out_path, mode, *args, **kwargs)

@abstractmethod
def convert_by_mode(self):
Expand Down
40 changes: 36 additions & 4 deletions mmhuman3d/data/data_converters/coco.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,21 @@
import json
import os

import mmcv
import numpy as np
from tqdm import tqdm

from mmhuman3d.core.conventions.keypoints_mapping import convert_kps
from mmhuman3d.data.data_structures.human_data import HumanData
from mmhuman3d.data.data_structures.multi_human_data import MultiHumanData
from .base_converter import BaseConverter
from .builder import DATA_CONVERTERS


def sort_json(json):
return int(json['image_id'])


@DATA_CONVERTERS.register_module()
class CocoConverter(BaseConverter):
"""CocoDataset dataset `Microsoft COCO: Common Objects in Context'
Expand All @@ -18,7 +24,11 @@ class CocoConverter(BaseConverter):
<https://arxiv.org/abs/1405.0312>`__ .
"""

def convert(self, dataset_path: str, out_path: str) -> dict:
def convert(self,
dataset_path: str,
out_path: str,
multi_human_data: bool = False,
file_client_args: dict = None) -> dict:
"""
Args:
dataset_path (str): Path to directory where raw images and
Expand All @@ -30,8 +40,12 @@ def convert(self, dataset_path: str, out_path: str) -> dict:
A dict containing keys image_path, bbox_xywh, keypoints2d,
keypoints2d_mask stored in HumanData() format
"""
# use HumanData to store all data
human_data = HumanData()
if multi_human_data:
# use MultiHumanData to store all data
human_data = MultiHumanData()
else:
# use HumanData to store all data
human_data = HumanData()

# structs we need
image_path_, keypoints2d_, bbox_xywh_ = [], [], []
Expand All @@ -40,12 +54,16 @@ def convert(self, dataset_path: str, out_path: str) -> dict:
json_path = os.path.join(dataset_path, 'annotations',
'person_keypoints_train2014.json')

json_data = json.load(open(json_path, 'r'))
if file_client_args is not None:
json_data = mmcv.load(json_path, file_client_args=file_client_args)
else:
json_data = json.load(open(json_path, 'r'))

imgs = {}
for img in json_data['images']:
imgs[img['id']] = img

json_data['annotations'].sort(key=sort_json)
for annot in tqdm(json_data['annotations']):

# keypoints processing
Expand All @@ -69,6 +87,20 @@ def convert(self, dataset_path: str, out_path: str) -> dict:
keypoints2d_.append(keypoints2d)
bbox_xywh_.append(bbox_xywh)

if multi_human_data:
# optional
optional = {}
optional['frame_range'] = []
frame_start = 0
frame_end = 0
for image_path in sorted(set(image_path_), key=image_path_.index):
frame_end = frame_start + \
image_path_.count(image_path)
optional['frame_range'].append([frame_start, frame_end])
frame_start = frame_end
optional['frame_range'] = np.array(optional['frame_range'])
human_data['optional'] = optional

# convert keypoints
bbox_xywh_ = np.array(bbox_xywh_).reshape((-1, 4))
bbox_xywh_ = np.hstack([bbox_xywh_, np.ones([bbox_xywh_.shape[0], 1])])
Expand Down
38 changes: 32 additions & 6 deletions mmhuman3d/data/data_converters/crowdpose.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,13 @@
import os
from typing import List

import mmcv
import numpy as np
from tqdm import tqdm

from mmhuman3d.core.conventions.keypoints_mapping import convert_kps
from mmhuman3d.data.data_structures.human_data import HumanData
from mmhuman3d.data.data_structures.multi_human_data import MultiHumanData
from .base_converter import BaseModeConverter
from .builder import DATA_CONVERTERS

Expand All @@ -28,8 +30,12 @@ class CrowdposeConverter(BaseModeConverter):
def __init__(self, modes: List = []) -> None:
super(CrowdposeConverter, self).__init__(modes)

def convert_by_mode(self, dataset_path: str, out_path: str,
mode: str) -> dict:
def convert_by_mode(self,
dataset_path: str,
out_path: str,
mode: str,
multi_human_data: bool = False,
file_client_args: dict = None) -> dict:
"""
Args:
dataset_path (str): Path to directory where raw images and
Expand All @@ -42,8 +48,12 @@ def convert_by_mode(self, dataset_path: str, out_path: str,
A dict containing keys image_path, bbox_xywh, keypoints2d,
keypoints2d_mask stored in HumanData() format
"""
# use HumanData to store all data
human_data = HumanData()
if multi_human_data:
# use MultiHumanData to store all data
human_data = MultiHumanData()
else:
# use HumanData to store all data
human_data = HumanData()

# structs we need
image_path_, keypoints2d_, bbox_xywh_ = [], [], []
Expand All @@ -52,7 +62,10 @@ def convert_by_mode(self, dataset_path: str, out_path: str,
json_path = os.path.join(dataset_path,
'crowdpose_{}.json'.format(mode))

json_data = json.load(open(json_path, 'r'))
if file_client_args is not None:
json_data = mmcv.load(json_path, file_client_args=file_client_args)
else:
json_data = json.load(open(json_path, 'r'))

imgs = {}
for img in json_data['images']:
Expand Down Expand Up @@ -92,13 +105,26 @@ def convert_by_mode(self, dataset_path: str, out_path: str,
keypoints2d_.append(keypoints2d)
bbox_xywh_.append(bbox_xywh)

if multi_human_data:
# optional
optional = {}
optional['frame_range'] = []
frame_start = 0
frame_end = 0
for image_path in sorted(set(image_path_), key=image_path_.index):
frame_end = frame_start + \
image_path_.count(image_path)
optional['frame_range'].append([frame_start, frame_end])
frame_start = frame_end
optional['frame_range'] = np.array(optional['frame_range'])
human_data['optional'] = optional

# convert keypoints
bbox_xywh_ = np.array(bbox_xywh_).reshape((-1, 4))
bbox_xywh_ = np.hstack([bbox_xywh_, np.ones([bbox_xywh_.shape[0], 1])])
keypoints2d_ = np.array(keypoints2d_).reshape((-1, 14, 3))
keypoints2d_, mask = convert_kps(keypoints2d_, 'crowdpose',
'human_data')

human_data['image_path'] = image_path_
human_data['keypoints2d_mask'] = mask
human_data['keypoints2d'] = keypoints2d_
Expand Down
45 changes: 39 additions & 6 deletions mmhuman3d/data/data_converters/eft.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,13 @@
import os
from typing import List

import mmcv
import numpy as np
from tqdm import tqdm

from mmhuman3d.core.conventions.keypoints_mapping import convert_kps
from mmhuman3d.data.data_structures.human_data import HumanData
from mmhuman3d.data.data_structures.multi_human_data import MultiHumanData
from mmhuman3d.utils.transforms import rotmat_to_aa
from .base_converter import BaseModeConverter
from .builder import DATA_CONVERTERS
Expand Down Expand Up @@ -44,22 +46,35 @@ def center_scale_to_bbox(center: List[float], scale: float) -> List[float]:
x, y = center[0] - w / 2, center[1] - h / 2
return [x, y, w, h]

def convert_by_mode(self, dataset_path: str, out_path: str,
mode: str) -> dict:
def convert_by_mode(self,
dataset_path: str,
out_path: str,
mode: str,
multi_human_data: bool = False,
file_client_args: dict = None) -> dict:
"""
Args:
dataset_path (str): Path to directory where raw images and
annotations are stored.
out_path (str): Path to directory to save preprocessed npz file
mode (str): Mode in accepted modes
multi_human_data (bool): Stored format. If set to True,
stored in MultiHumanData() format. Default: False,
stored in HumanData() format.

Returns:
dict:
A dict containing keys image_path, bbox_xywh, keypoints2d,
keypoints2d_mask, smpl stored in HumanData() format
"""
# use HumanData to store all data
human_data = HumanData()

if multi_human_data:
# use MultiHumanData to store all data
human_data = MultiHumanData()
else:
# use HumanData to store all data
human_data = HumanData()

image_path_, bbox_xywh_, keypoints2d_ = [], [], []
smpl = {}
smpl['betas'] = []
Expand All @@ -73,8 +88,12 @@ def convert_by_mode(self, dataset_path: str, out_path: str,
else:
raise ValueError('provided dataset is not in eft fittings')

with open(annot_file, 'r') as f:
eft_data = json.load(f)
if file_client_args is not None:
eft_data = mmcv.load(annot_file, file_client_args=file_client_args)
else:
with open(annot_file, 'r') as f:
eft_data = json.load(f)

eft_data_all = eft_data['data']

for data in tqdm(eft_data_all):
Expand All @@ -101,6 +120,20 @@ def convert_by_mode(self, dataset_path: str, out_path: str,
bbox_xywh_.append(bbox_xywh)
keypoints2d_.append(gt_keypoint_2d)

if multi_human_data:
# optional
optional = {}
optional['frame_range'] = []
frame_start = 0
frame_end = 0
for image_path in sorted(set(image_path_), key=image_path_.index):
frame_end = frame_start + \
image_path_.count(image_path)
optional['frame_range'].append([frame_start, frame_end])
frame_start = frame_end
optional['frame_range'] = np.array(optional['frame_range'])
human_data['optional'] = optional

bbox_xywh_ = np.array(bbox_xywh_).reshape((-1, 4))
bbox_xywh_ = np.hstack([bbox_xywh_, np.ones([bbox_xywh_.shape[0], 1])])
smpl['body_pose'] = np.array(smpl['body_pose']).reshape((-1, 23, 3))
Expand Down
22 changes: 18 additions & 4 deletions mmhuman3d/data/data_converters/h36m.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from mmhuman3d.core.cameras.camera_parameters import CameraParameter
from mmhuman3d.core.conventions.keypoints_mapping import convert_kps
from mmhuman3d.data.data_structures.human_data import HumanData
from mmhuman3d.data.data_structures.multi_human_data import MultiHumanData
from .base_converter import BaseModeConverter
from .builder import DATA_CONVERTERS

Expand Down Expand Up @@ -193,8 +194,11 @@ def __init__(self,
'60457274': 3,
}

def convert_by_mode(self, dataset_path: str, out_path: str,
mode: str) -> dict:
def convert_by_mode(self,
dataset_path: str,
out_path: str,
mode: str,
multi_human_data: bool = False) -> dict:
"""
Args:
dataset_path (str): Path to directory where raw images and
Expand All @@ -208,8 +212,12 @@ def convert_by_mode(self, dataset_path: str, out_path: str,
keypoints2d_mask, keypoints3d, keypoints3d_mask, cam_param
stored in HumanData() format
"""
# use HumanData to store all data
human_data = HumanData()
if multi_human_data:
# use MultiHumanData to store all data
human_data = MultiHumanData()
else:
# use HumanData to store all data
human_data = HumanData()

# pick 17 joints from 32 (repeated) joints
h36m_idx = [
Expand Down Expand Up @@ -374,6 +382,12 @@ def convert_by_mode(self, dataset_path: str, out_path: str,
smpl['betas'] = np.array(smpl['betas']).reshape((-1, 10))
human_data['smpl'] = smpl

if multi_human_data:
optional = {}
optional['frame_range'] = np.array(
[[i, i + 1] for i in range(len(image_path_))])
human_data['optional'] = optional

metadata_path = os.path.join(dataset_path, 'metadata.xml')
if isinstance(metadata_path, str):
camera = H36mCamera(metadata_path)
Expand Down
Loading