4
4
##
5
5
## Started 22 Feb 2024
6
6
## Izaskun Mallona
7
-
8
- import sys
7
+ import hashlib
8
+ import subprocess
9
9
import os
10
10
from typing import List
11
11
12
+ from git import Repo
12
13
from snakemake .script import Snakemake
13
14
14
15
@@ -22,7 +23,61 @@ def mock_execution(inputs: List[str], output: str, snakemake: Snakemake):
22
23
print (' params are' , snakemake .params )
23
24
24
25
25
- def dump_parameters_to_file (output_dir : str , parameters : str ):
26
+ def execution (module_dir : str , module_name : str , output_dir : str , dataset : str ,
27
+ inputs_map : dict [str , str ], parameters : List [str ]):
28
+
29
+ run_sh = os .path .join (module_dir , 'run.sh' )
30
+ if not os .path .exists (run_sh ):
31
+ raise RuntimeError (f'{ module_name } run.sh script does not exist' )
32
+
33
+ # Constructing the command list
34
+ command = [run_sh , output_dir , dataset ]
35
+
36
+ # Adding input files with their respective keys
37
+ if inputs_map :
38
+ for k , v in inputs_map .items ():
39
+ command .extend ([f"--{ k } " , v ])
40
+
41
+ # Adding extra parameters
42
+ if parameters :
43
+ command .extend (parameters )
44
+
45
+ try :
46
+ # Execute the shell script
47
+ result = subprocess .run (command , check = True , capture_output = True , text = True )
48
+ return result .stdout
49
+
50
+ except subprocess .CalledProcessError as e :
51
+ raise RuntimeError (f'Error executing { run_sh } ' ) from e
52
+
53
+
54
+ # Create a unique folder name based on the repository URL and commit hash
55
+ def generate_unique_repo_folder_name (repo_url , commit_hash ):
56
+ unique_string = f"{ repo_url } @{ commit_hash } "
57
+ folder_name = hashlib .md5 (unique_string .encode ()).hexdigest ()
58
+
59
+ return folder_name
60
+
61
+
62
+ def clone_module (output_dir : str , repository_url : str , commit_hash : str ):
63
+ module_name = generate_unique_repo_folder_name (repository_url , commit_hash )
64
+ module_dir = os .path .join (output_dir , module_name )
65
+
66
+ if not os .path .exists (module_dir ):
67
+ repo = Repo .clone_from (repository_url , module_dir )
68
+ repo .git .checkout (commit_hash )
69
+ else :
70
+ repo = Repo (module_dir )
71
+
72
+ if repo .head .commit .hexsha [:7 ] != commit_hash :
73
+ raise RuntimeError (f'WARNING: { commit_hash } does not match { repo .head .commit .hexsha [:7 ]} ' )
74
+
75
+ return module_dir
76
+
77
+
78
+ def dump_parameters_to_file (output_dir : str , parameters : List [str ]):
79
+ os .makedirs (output_dir , exist_ok = True )
80
+
26
81
if parameters is not None :
27
82
params_file = os .path .join (output_dir , 'parameters.txt' )
28
83
with open (params_file , 'w' ) as params_file :
@@ -35,17 +90,31 @@ def dump_parameters_to_file(output_dir: str, parameters: str):
35
90
36
91
try :
37
92
snakemake : Snakemake = snakemake
38
- parameters = dict (snakemake .params )['parameters' ]
39
- output_dir = os .path .dirname (snakemake .output [0 ])
40
- os .makedirs (output_dir , exist_ok = True )
93
+ params = dict (snakemake .params )
94
+ parameters = params ['parameters' ]
95
+ repository_url = params ['repository_url' ]
96
+ commit_hash = params ['commit_hash' ]
97
+ inputs_map = params .get ('inputs_map' )
41
98
99
+ # Create parameters file for outputs
100
+ output_dir = os .path .dirname (snakemake .output [0 ])
42
101
dump_parameters_to_file (output_dir , parameters )
43
102
44
- for out in snakemake .output :
45
- with open (out , 'w' ) as sys .stdout :
46
- mock_execution (inputs = snakemake .input ,
47
- output = out ,
48
- snakemake = snakemake )
103
+ # Clone github repository
104
+ repositories_dir = os .path .join (".snakemake" , "repos" )
105
+ module_dir = clone_module (repositories_dir , repository_url , commit_hash )
106
+
107
+ # Execute module code
108
+ module_name = snakemake .rule
109
+
110
+ # TODO Fix logic of inferring output dirname
111
+ output_dir = os .path .commonpath (snakemake .output )
112
+ if os .path .splitext (output_dir )[1 ] != '' :
113
+ output_dir = os .path .dirname (output_dir )
114
+
115
+ dataset = snakemake .wildcards .dataset
116
+ execution (module_dir , module_name = module_name , output_dir = output_dir , dataset = dataset ,
117
+ inputs_map = inputs_map , parameters = parameters )
49
118
50
119
except NameError :
51
120
raise RuntimeError ("This script must be run from within a Snakemake workflow." )
0 commit comments