Skip to content

Commit 8bf3572

Browse files
committed
Make HDF5 dataset creation script compute-parallel
1 parent 0fae9ce commit 8bf3572

File tree

1 file changed

+6
-7
lines changed

1 file changed

+6
-7
lines changed

project/datasets/builder/create_hdf5_dataset.py

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3,14 +3,15 @@
33
import os
44

55
from pathlib import Path
6-
from tqdm import tqdm
6+
from parallel import submit_jobs
77

88
from project.utils.utils import convert_pair_pickle_to_hdf5
99

1010

1111
@click.command()
1212
@click.argument('raw_data_dir', default='../DIPS/final/raw', type=click.Path(exists=True))
13-
def main(raw_data_dir: str):
13+
@click.option('--num_cpus', '-c', default=1)
14+
def main(raw_data_dir: str, num_cpus: int):
1415
raw_data_dir = Path(raw_data_dir)
1516
raw_data_pickle_filepaths = []
1617
for root, dirs, files in os.walk(raw_data_dir):
@@ -19,11 +20,9 @@ def main(raw_data_dir: str):
1920
for file in subfiles:
2021
if file.endswith('.dill'):
2122
raw_data_pickle_filepaths.append(raw_data_dir / dir / file)
22-
for pickle_filepath in tqdm(raw_data_pickle_filepaths):
23-
convert_pair_pickle_to_hdf5(
24-
pickle_filepath=pickle_filepath,
25-
hdf5_filepath=Path(pickle_filepath).with_suffix(".hdf5")
26-
)
23+
inputs = [(pickle_filepath, Path(pickle_filepath).with_suffix(".hdf5")) for pickle_filepath in raw_data_pickle_filepaths]
24+
submit_jobs(convert_pair_pickle_to_hdf5, inputs, num_cpus)
25+
2726
# filepath = Path("project/datasets/DIPS/final/raw/0g/10gs.pdb1_0.dill")
2827
# pickle_example = convert_pair_hdf5_to_pickle(
2928
# hdf5_filepath=Path(filepath).with_suffix(".hdf5")

0 commit comments

Comments
 (0)