From c6ad495176c952799a1a8c6948cf8d3457879230 Mon Sep 17 00:00:00 2001 From: Simon Alibert Date: Wed, 20 Nov 2024 11:57:27 +0100 Subject: [PATCH] Enhance dataset cards --- lerobot/common/datasets/card_template.md | 27 ++ lerobot/common/datasets/lerobot_dataset.py | 6 +- lerobot/common/datasets/utils.py | 64 ++--- .../v2/batch_convert_dataset_v1_to_v2.py | 264 +++++++++++++----- .../datasets/v2/convert_dataset_v1_to_v2.py | 10 +- lerobot/scripts/push_dataset_to_hub.py | 4 +- 6 files changed, 265 insertions(+), 110 deletions(-) create mode 100644 lerobot/common/datasets/card_template.md diff --git a/lerobot/common/datasets/card_template.md b/lerobot/common/datasets/card_template.md new file mode 100644 index 000000000..4d9417498 --- /dev/null +++ b/lerobot/common/datasets/card_template.md @@ -0,0 +1,27 @@ +--- +# For reference on dataset card metadata, see the spec: https://github.com/huggingface/hub-docs/blob/main/datasetcard.md?plain=1 +# Doc / guide: https://huggingface.co/docs/hub/datasets-cards +{{ card_data }} +--- + +This dataset was created using [LeRobot](https://github.com/huggingface/lerobot). + +## Dataset Description + +{{ dataset_description | default("", true) }} + +- **Homepage:** {{ url | default("[More Information Needed]", true)}} +- **Paper [optional]:** {{ paper | default("[More Information Needed]", true)}} +- **License:** {{ license | default("[More Information Needed]", true)}} + +## Dataset Structure + +{{ dataset_structure | default("[More Information Needed]", true)}} + +## Citation [optional] + +**BibTeX:** + +```bibtex +{{ citation_bibtex | default("[More Information Needed]", true)}} +``` diff --git a/lerobot/common/datasets/lerobot_dataset.py b/lerobot/common/datasets/lerobot_dataset.py index c5ae03544..8dbae8f51 100644 --- a/lerobot/common/datasets/lerobot_dataset.py +++ b/lerobot/common/datasets/lerobot_dataset.py @@ -467,10 +467,10 @@ def __init__( def push_to_hub( self, tags: list | None = None, - text: str | None = None, license: str | None = "apache-2.0", push_videos: bool = True, private: bool = False, + **card_kwargs, ) -> None: if not self.consolidated: raise RuntimeError( @@ -495,7 +495,9 @@ def push_to_hub( repo_type="dataset", ignore_patterns=ignore_patterns, ) - card = create_lerobot_dataset_card(tags=tags, text=text, info=self.meta.info, license=license) + card = create_lerobot_dataset_card( + tags=tags, dataset_info=self.meta.info, license=license, **card_kwargs + ) card.push_to_hub(repo_id=self.repo_id, repo_type="dataset") create_branch(repo_id=self.repo_id, branch=CODEBASE_VERSION, repo_type="dataset") diff --git a/lerobot/common/datasets/utils.py b/lerobot/common/datasets/utils.py index 0ad3dfaec..036a49427 100644 --- a/lerobot/common/datasets/utils.py +++ b/lerobot/common/datasets/utils.py @@ -27,7 +27,7 @@ import pyarrow.compute as pc import torch from datasets.table import embed_table_storage -from huggingface_hub import DatasetCard, HfApi +from huggingface_hub import DatasetCard, DatasetCardData, HfApi from PIL import Image as PILImage from torchvision import transforms @@ -50,6 +50,8 @@ --- This dataset was created using [LeRobot](https://github.com/huggingface/lerobot). +## {} + """ DEFAULT_FEATURES = { @@ -468,41 +470,33 @@ def create_branch(repo_id, *, branch: str, repo_type: str | None = None) -> None def create_lerobot_dataset_card( tags: list | None = None, - text: str | None = None, - info: dict | None = None, - license: str | None = None, - url: str | None = None, - citation: str | None = None, - arxiv: str | None = None, + dataset_info: dict | None = None, + **kwargs, ) -> DatasetCard: """ - If specified, license must be one of https://huggingface.co/docs/hub/repositories-licenses. + Keyword arguments will be used to replace values in ./lerobot/common/datasets/card_template.md. + Note: If specified, license must be one of https://huggingface.co/docs/hub/repositories-licenses. """ - card = DatasetCard(DATASET_CARD_TEMPLATE) - card.data.configs = [ - { - "config_name": "default", - "data_files": "data/*/*.parquet", - } - ] - card.data.task_categories = ["robotics"] - card.data.license = license - card.data.tags = ["LeRobot"] - if license: - card.data.license = license + card_tags = ["LeRobot"] if tags: - card.data.tags += tags - if url: - card.text += f"## Homepage:\n{url}\n" - if text: - card.text += f"{text}\n" - if info: - card.text += "## Info\n" - card.text += "[meta/info.json](meta/info.json)\n" - card.text += f"```json\n{json.dumps(info, indent=4)}\n```" - if citation: - card.text += "## Citation\n" - card.text += f"```\n{citation}\n```\n" - if arxiv: - card.data.arxiv = arxiv - return card + card_tags += tags + if dataset_info: + dataset_structure = "[meta/info.json](meta/info.json):\n" + dataset_structure += f"```json\n{json.dumps(dataset_info, indent=4)}\n```\n" + kwargs = {**kwargs, "dataset_structure": dataset_structure} + card_data = DatasetCardData( + license=kwargs.get("license"), + tags=card_tags, + task_categories=["robotics"], + configs=[ + { + "config_name": "default", + "data_files": "data/*/*.parquet", + } + ], + ) + return DatasetCard.from_template( + card_data=card_data, + template_path="./lerobot/common/datasets/card_template.md", + **kwargs, + ) diff --git a/lerobot/common/datasets/v2/batch_convert_dataset_v1_to_v2.py b/lerobot/common/datasets/v2/batch_convert_dataset_v1_to_v2.py index 37c9583de..c1df71524 100644 --- a/lerobot/common/datasets/v2/batch_convert_dataset_v1_to_v2.py +++ b/lerobot/common/datasets/v2/batch_convert_dataset_v1_to_v2.py @@ -14,84 +14,220 @@ # See the License for the specific language governing permissions and # limitations under the License. +""" +Note: Since the original Aloha datasets don't use shadow motors, you need to comment those out in +lerobot/configs/robot/aloha.yaml before running this script. +""" + import traceback from pathlib import Path +from textwrap import dedent -from lerobot import available_datasets from lerobot.common.datasets.v2.convert_dataset_v1_to_v2 import convert_dataset, parse_robot_config LOCAL_DIR = Path("data/") -ALOHA_SINGLE_TASKS_REAL = { - "aloha_mobile_cabinet": "Open the top cabinet, store the pot inside it then close the cabinet.", - "aloha_mobile_chair": "Push the chairs in front of the desk to place them against it.", - "aloha_mobile_elevator": "Take the elevator to the 1st floor.", - "aloha_mobile_shrimp": "Sauté the raw shrimp on both sides, then serve it in the bowl.", - "aloha_mobile_wash_pan": "Pick up the pan, rinse it in the sink and then place it in the drying rack.", - "aloha_mobile_wipe_wine": "Pick up the wet cloth on the faucet and use it to clean the spilled wine on the table and underneath the glass.", - "aloha_static_battery": "Place the battery into the slot of the remote controller.", - "aloha_static_candy": "Pick up the candy and unwrap it.", - "aloha_static_coffee": "Place the coffee capsule inside the capsule container, then place the cup onto the center of the cup tray, then push the 'Hot Water' and 'Travel Mug' buttons.", - "aloha_static_coffee_new": "Place the coffee capsule inside the capsule container, then place the cup onto the center of the cup tray.", - "aloha_static_cups_open": "Pick up the plastic cup and open its lid.", - "aloha_static_fork_pick_up": "Pick up the fork and place it on the plate.", - "aloha_static_pingpong_test": "Transfer one of the two balls in the right glass into the left glass, then transfer it back to the right glass.", - "aloha_static_pro_pencil": "Pick up the pencil with the right arm, hand it over to the left arm then place it back onto the table.", - "aloha_static_screw_driver": "Pick up the screwdriver with the right arm, hand it over to the left arm then place it into the cup.", - "aloha_static_tape": "Cut a small piece of tape from the tape dispenser then place it on the cardboard box's edge.", - "aloha_static_thread_velcro": "Pick up the velcro cable tie with the left arm, then insert the end of the velcro tie into the other end's loop with the right arm.", - "aloha_static_towel": "Pick up a piece of paper towel and place it on the spilled liquid.", - "aloha_static_vinh_cup": "Pick up the platic cup with the right arm, then pop its lid open with the left arm.", - "aloha_static_vinh_cup_left": "Pick up the platic cup with the left arm, then pop its lid open with the right arm.", - "aloha_static_ziploc_slide": "Slide open the ziploc bag.", -} + ALOHA_CONFIG = Path("lerobot/configs/robot/aloha.yaml") +ALOHA_MOBILE_INFO = { + "robot_config": parse_robot_config(ALOHA_CONFIG), + "license": "mit", + "url": "https://mobile-aloha.github.io/", + "paper": "https://arxiv.org/abs/2401.02117", + "citation_bibtex": dedent(""" + @inproceedings{fu2024mobile, + author = {Fu, Zipeng and Zhao, Tony Z. and Finn, Chelsea}, + title = {Mobile ALOHA: Learning Bimanual Mobile Manipulation with Low-Cost Whole-Body Teleoperation}, + booktitle = {arXiv}, + year = {2024}, + }""").lstrip(), +} +ALOHA_STATIC_INFO = { + "robot_config": parse_robot_config(ALOHA_CONFIG), + "license": "mit", + "url": "https://tonyzhaozh.github.io/aloha/", + "paper": "https://arxiv.org/abs/2304.13705", + "citation_bibtex": dedent(""" + @article{Zhao2023LearningFB, + title={Learning Fine-Grained Bimanual Manipulation with Low-Cost Hardware}, + author={Tony Zhao and Vikash Kumar and Sergey Levine and Chelsea Finn}, + journal={RSS}, + year={2023}, + volume={abs/2304.13705}, + url={https://arxiv.org/abs/2304.13705} + }""").lstrip(), +} +PUSHT_INFO = { + "license": "mit", + "url": "https://diffusion-policy.cs.columbia.edu/", + "paper": "https://arxiv.org/abs/2303.04137v5", + "citation_bibtex": dedent(""" + @article{chi2024diffusionpolicy, + author = {Cheng Chi and Zhenjia Xu and Siyuan Feng and Eric Cousineau and Yilun Du and Benjamin Burchfiel and Russ Tedrake and Shuran Song}, + title ={Diffusion Policy: Visuomotor Policy Learning via Action Diffusion}, + journal = {The International Journal of Robotics Research}, + year = {2024}, + }""").lstrip(), +} +XARM_INFO = { + "license": "mit", + "url": "https://www.nicklashansen.com/td-mpc/", + "paper": "https://arxiv.org/abs/2203.04955", + "citation_bibtex": dedent(""" + @inproceedings{Hansen2022tdmpc, + title={Temporal Difference Learning for Model Predictive Control}, + author={Nicklas Hansen and Xiaolong Wang and Hao Su}, + booktitle={ICML}, + year={2022} + } + """), +} +UNITREEH_INFO = { + "license": "apache-2.0", +} + + +DATASETS = { + "aloha_mobile_cabinet": { + "single_task": "Open the top cabinet, store the pot inside it then close the cabinet.", + **ALOHA_MOBILE_INFO, + }, + "aloha_mobile_chair": { + "single_task": "Push the chairs in front of the desk to place them against it.", + **ALOHA_MOBILE_INFO, + }, + "aloha_mobile_elevator": { + "single_task": "Take the elevator to the 1st floor.", + **ALOHA_MOBILE_INFO, + }, + "aloha_mobile_shrimp": { + "single_task": "Sauté the raw shrimp on both sides, then serve it in the bowl.", + **ALOHA_MOBILE_INFO, + }, + "aloha_mobile_wash_pan": { + "single_task": "Pick up the pan, rinse it in the sink and then place it in the drying rack.", + **ALOHA_MOBILE_INFO, + }, + "aloha_mobile_wipe_wine": { + "single_task": "Pick up the wet cloth on the faucet and use it to clean the spilled wine on the table and underneath the glass.", + **ALOHA_MOBILE_INFO, + }, + "aloha_static_battery": { + "single_task": "Place the battery into the slot of the remote controller.", + **ALOHA_STATIC_INFO, + }, + "aloha_static_candy": {"single_task": "Pick up the candy and unwrap it.", **ALOHA_STATIC_INFO}, + "aloha_static_coffee": { + "single_task": "Place the coffee capsule inside the capsule container, then place the cup onto the center of the cup tray, then push the 'Hot Water' and 'Travel Mug' buttons.", + **ALOHA_STATIC_INFO, + }, + "aloha_static_coffee_new": { + "single_task": "Place the coffee capsule inside the capsule container, then place the cup onto the center of the cup tray.", + **ALOHA_STATIC_INFO, + }, + "aloha_static_cups_open": { + "single_task": "Pick up the plastic cup and open its lid.", + **ALOHA_STATIC_INFO, + }, + "aloha_static_fork_pick_up": { + "single_task": "Pick up the fork and place it on the plate.", + **ALOHA_STATIC_INFO, + }, + "aloha_static_pingpong_test": { + "single_task": "Transfer one of the two balls in the right glass into the left glass, then transfer it back to the right glass.", + **ALOHA_STATIC_INFO, + }, + "aloha_static_pro_pencil": { + "single_task": "Pick up the pencil with the right arm, hand it over to the left arm then place it back onto the table.", + **ALOHA_STATIC_INFO, + }, + "aloha_static_screw_driver": { + "single_task": "Pick up the screwdriver with the right arm, hand it over to the left arm then place it into the cup.", + **ALOHA_STATIC_INFO, + }, + "aloha_static_tape": { + "single_task": "Cut a small piece of tape from the tape dispenser then place it on the cardboard box's edge.", + **ALOHA_STATIC_INFO, + }, + "aloha_static_thread_velcro": { + "single_task": "Pick up the velcro cable tie with the left arm, then insert the end of the velcro tie into the other end's loop with the right arm.", + **ALOHA_STATIC_INFO, + }, + "aloha_static_towel": { + "single_task": "Pick up a piece of paper towel and place it on the spilled liquid.", + **ALOHA_STATIC_INFO, + }, + "aloha_static_vinh_cup": { + "single_task": "Pick up the platic cup with the right arm, then pop its lid open with the left arm.", + **ALOHA_STATIC_INFO, + }, + "aloha_static_vinh_cup_left": { + "single_task": "Pick up the platic cup with the left arm, then pop its lid open with the right arm.", + **ALOHA_STATIC_INFO, + }, + "aloha_static_ziploc_slide": {"single_task": "Slide open the ziploc bag.", **ALOHA_STATIC_INFO}, + "aloha_sim_insertion_scripted": {"single_task": "Insert the peg into the socket.", **ALOHA_STATIC_INFO}, + "aloha_sim_insertion_scripted_image": { + "single_task": "Insert the peg into the socket.", + **ALOHA_STATIC_INFO, + }, + "aloha_sim_insertion_human": {"single_task": "Insert the peg into the socket.", **ALOHA_STATIC_INFO}, + "aloha_sim_insertion_human_image": { + "single_task": "Insert the peg into the socket.", + **ALOHA_STATIC_INFO, + }, + "aloha_sim_transfer_cube_scripted": { + "single_task": "Pick up the cube with the right arm and transfer it to the left arm.", + **ALOHA_STATIC_INFO, + }, + "aloha_sim_transfer_cube_scripted_image": { + "single_task": "Pick up the cube with the right arm and transfer it to the left arm.", + **ALOHA_STATIC_INFO, + }, + "aloha_sim_transfer_cube_human": { + "single_task": "Pick up the cube with the right arm and transfer it to the left arm.", + **ALOHA_STATIC_INFO, + }, + "aloha_sim_transfer_cube_human_image": { + "single_task": "Pick up the cube with the right arm and transfer it to the left arm.", + **ALOHA_STATIC_INFO, + }, + "pusht": {"single_task": "Push the T-shaped block onto the T-shaped target.", **PUSHT_INFO}, + "pusht_image": {"single_task": "Push the T-shaped block onto the T-shaped target.", **PUSHT_INFO}, + "unitreeh1_fold_clothes": {"single_task": "Fold the sweatshirt.", **UNITREEH_INFO}, + "unitreeh1_rearrange_objects": {"single_task": "Put the object into the bin.", **UNITREEH_INFO}, + "unitreeh1_two_robot_greeting": { + "single_task": "Greet the other robot with a high five.", + **UNITREEH_INFO, + }, + "unitreeh1_warehouse": { + "single_task": "Grab the spray paint on the shelf and place it in the bin on top of the robot dog.", + **UNITREEH_INFO, + }, + "xarm_lift_medium": {"single_task": "Pick up the cube and lift it.", **XARM_INFO}, + "xarm_lift_medium_image": {"single_task": "Pick up the cube and lift it.", **XARM_INFO}, + "xarm_lift_medium_replay": {"single_task": "Pick up the cube and lift it.", **XARM_INFO}, + "xarm_lift_medium_replay_image": {"single_task": "Pick up the cube and lift it.", **XARM_INFO}, + "xarm_push_medium": {"single_task": "Push the cube onto the target.", **XARM_INFO}, + "xarm_push_medium_image": {"single_task": "Push the cube onto the target.", **XARM_INFO}, + "xarm_push_medium_replay": {"single_task": "Push the cube onto the target.", **XARM_INFO}, + "xarm_push_medium_replay_image": {"single_task": "Push the cube onto the target.", **XARM_INFO}, + "umi_cup_in_the_wild": { + "single_task": "Put the cup on the plate.", + "license": "apache-2.0", + }, +} def batch_convert(): status = {} logfile = LOCAL_DIR / "conversion_log.txt" - for num, repo_id in enumerate(available_datasets): - print(f"\nConverting {repo_id} ({num}/{len(available_datasets)})") + # assert set(DATASETS) == set(id_.split("/")[1] for id_ in available_datasets) + for num, (name, kwargs) in enumerate(DATASETS.items()): + repo_id = f"lerobot/{name}" + print(f"\nConverting {repo_id} ({num}/{len(DATASETS)})") print("---------------------------------------------------------") - name = repo_id.split("/")[1] - single_task, tasks_col, robot_config = None, None, None - - if "aloha" in name: - robot_config = parse_robot_config(ALOHA_CONFIG) - if "sim_insertion" in name: - single_task = "Insert the peg into the socket." - elif "sim_transfer" in name: - single_task = "Pick up the cube with the right arm and transfer it to the left arm." - else: - single_task = ALOHA_SINGLE_TASKS_REAL[name] - elif "unitreeh1" in name: - if "fold_clothes" in name: - single_task = "Fold the sweatshirt." - elif "rearrange_objects" in name or "rearrange_objects" in name: - single_task = "Put the object into the bin." - elif "two_robot_greeting" in name: - single_task = "Greet the other robot with a high five." - elif "warehouse" in name: - single_task = ( - "Grab the spray paint on the shelf and place it in the bin on top of the robot dog." - ) - elif name != "columbia_cairlab_pusht_real" and "pusht" in name: - single_task = "Push the T-shaped block onto the T-shaped target." - elif "xarm_lift" in name or "xarm_push" in name: - single_task = "Pick up the cube and lift it." - elif name == "umi_cup_in_the_wild": - single_task = "Put the cup on the plate." - else: - tasks_col = "language_instruction" - try: - convert_dataset( - repo_id=repo_id, - local_dir=LOCAL_DIR, - single_task=single_task, - tasks_col=tasks_col, - robot_config=robot_config, - ) + convert_dataset(repo_id, LOCAL_DIR, **kwargs) status = f"{repo_id}: success." with open(logfile, "a") as file: file.write(status + "\n") diff --git a/lerobot/common/datasets/v2/convert_dataset_v1_to_v2.py b/lerobot/common/datasets/v2/convert_dataset_v1_to_v2.py index de8ff4c4e..dafcded4e 100644 --- a/lerobot/common/datasets/v2/convert_dataset_v1_to_v2.py +++ b/lerobot/common/datasets/v2/convert_dataset_v1_to_v2.py @@ -176,6 +176,7 @@ def parse_robot_config(config_path: Path, config_overrides: list[str] | None = N "robot_type": robot_cfg["robot_type"], "names": { "observation.state": state_names, + "observation.effort": state_names, "action": action_names, }, } @@ -436,11 +437,8 @@ def convert_dataset( tasks_path: Path | None = None, tasks_col: Path | None = None, robot_config: dict | None = None, - license: str | None = None, - url: str | None = None, - arxiv: str | None = None, - citation: str | None = None, test_branch: str | None = None, + **card_kwargs, ): v1 = get_hub_safe_version(repo_id, V16) v1x_dir = local_dir / V16 / repo_id @@ -566,9 +564,7 @@ def convert_dataset( } write_json(metadata_v2_0, v20_dir / INFO_PATH) convert_stats_to_json(v1x_dir, v20_dir) - card = create_lerobot_dataset_card( - tags=repo_tags, info=metadata_v2_0, license=license, url=url, citation=citation, arxiv=arxiv - ) + card = create_lerobot_dataset_card(tags=repo_tags, dataset_info=metadata_v2_0, **card_kwargs) with contextlib.suppress(EntryNotFoundError): hub_api.delete_folder(repo_id=repo_id, path_in_repo="data", repo_type="dataset", revision=branch) diff --git a/lerobot/scripts/push_dataset_to_hub.py b/lerobot/scripts/push_dataset_to_hub.py index 755424570..2bb641a4d 100644 --- a/lerobot/scripts/push_dataset_to_hub.py +++ b/lerobot/scripts/push_dataset_to_hub.py @@ -120,11 +120,11 @@ def push_dataset_card_to_hub( repo_id: str, revision: str | None, tags: list | None = None, - text: str | None = None, license: str = "apache-2.0", + **card_kwargs, ): """Creates and pushes a LeRobotDataset Card with appropriate tags to easily find it on the hub.""" - card = create_lerobot_dataset_card(tags=tags, text=text, license=license) + card = create_lerobot_dataset_card(tags=tags, license=license, **card_kwargs) card.push_to_hub(repo_id=repo_id, repo_type="dataset", revision=revision)