3
3
import os
4
4
5
5
from pathlib import Path
6
- from tqdm import tqdm
6
+ from parallel import submit_jobs
7
7
8
8
from project .utils .utils import convert_pair_pickle_to_hdf5
9
9
10
10
11
11
@click .command ()
12
12
@click .argument ('raw_data_dir' , default = '../DIPS/final/raw' , type = click .Path (exists = True ))
13
- def main (raw_data_dir : str ):
13
+ @click .option ('--num_cpus' , '-c' , default = 1 )
14
+ def main (raw_data_dir : str , num_cpus : int ):
14
15
raw_data_dir = Path (raw_data_dir )
15
16
raw_data_pickle_filepaths = []
16
17
for root , dirs , files in os .walk (raw_data_dir ):
@@ -19,11 +20,9 @@ def main(raw_data_dir: str):
19
20
for file in subfiles :
20
21
if file .endswith ('.dill' ):
21
22
raw_data_pickle_filepaths .append (raw_data_dir / dir / file )
22
- for pickle_filepath in tqdm (raw_data_pickle_filepaths ):
23
- convert_pair_pickle_to_hdf5 (
24
- pickle_filepath = pickle_filepath ,
25
- hdf5_filepath = Path (pickle_filepath ).with_suffix (".hdf5" )
26
- )
23
+ inputs = [(pickle_filepath , Path (pickle_filepath ).with_suffix (".hdf5" )) for pickle_filepath in raw_data_pickle_filepaths ]
24
+ submit_jobs (convert_pair_pickle_to_hdf5 , inputs , num_cpus )
25
+
27
26
# filepath = Path("project/datasets/DIPS/final/raw/0g/10gs.pdb1_0.dill")
28
27
# pickle_example = convert_pair_hdf5_to_pickle(
29
28
# hdf5_filepath=Path(filepath).with_suffix(".hdf5")
0 commit comments