@@ -63,11 +63,21 @@ def get_jobid():
6363 return os .environ .get ("SLURM_JOB_ID" )
6464
6565 def prepare_launcher (self , config , cluster ):
66+ # which launcher are we using?
67+ launcher = config ["computer" ].get ("launcher" ,None )
68+ # friendly check that you are using a launcher that we support
69+ if launcher not in ["srun" , "mpirun" ]:
70+ print (" The launcher %s is not compatible with ESM-Tools in SLURM " % (launcher ,))
71+ print (" Supported launchers for SLURM are srun and mpirun " )
72+
6673 # MA: not sure how this will play with heterogeneous parallelization
6774 if "multi_srun" in config ["general" ]:
6875 for run_type in list (config ["general" ]["multi_srun" ]):
6976 current_hostfile = self .path + "_" + run_type
70- write_one_hostfile (current_hostfile , config )
77+ if launcher == "srun" :
78+ write_one_hostfile_srun (current_hostfile , config )
79+ elif launcher == "mpirun" :
80+ write_one_hostfile_mpirun (current_hostfile , config )
7181
7282 if config ["computer" ].get (
7383 "heterogeneous_parallelization" , False
@@ -76,7 +86,11 @@ def prepare_launcher(self, config, cluster):
7686 config ["general" ]["batch" ].het_par_launcher_lines (config , cluster )
7787 else :
7888 # Standard/old way of running jobs with slurm
79- self .write_one_hostfile (self .path , config )
89+ if launcher == "srun" :
90+ self .write_one_hostfile_srun (self .path , config )
91+ elif launcher == "mpirun" :
92+ # JK: Need to think about how to handle heterogeneous paralleisation here...
93+ self .write_one_hostfile_mpirun (self .path , config )
8094
8195 hostfile_in_work = (
8296 config ["general" ]["work_dir" ] + "/" + os .path .basename (self .path )
@@ -85,10 +99,11 @@ def prepare_launcher(self, config, cluster):
8599
86100 return config
87101
88- def write_one_hostfile (self , hostfile , config ):
102+ def write_one_hostfile_srun (self , hostfile , config ):
89103 """
90104 Gathers previously prepared requirements
91105 (batch_system.calculate_requirements) and writes them to ``self.path``.
106+ Suitable for srun
92107 """
93108
94109 with open (hostfile , "w" ) as hostfile :
@@ -112,7 +127,50 @@ def write_one_hostfile(self, hostfile, config):
112127 hostfile .write (
113128 str (start_proc ) + "-" + str (end_proc ) + " " + command + "\n "
114129 )
130+
131+ def write_one_hostfile_mpirun (self , hostfile , config ):
132+ """
133+ Gathers previously prepared requirements
134+ (batch_system.calculate_requirements) and writes them to ``self.path``.
135+ Suitable for mpirun launcher
136+ """
137+
138+ # make an empty string which we will append commands to
139+ mpirun_options = ""
115140
141+ for model in config ["general" ]["valid_model_names" ]:
142+ end_proc = config [model ].get ("end_proc" , None )
143+ start_proc = config [model ].get ("start_proc" , None )
144+
145+ # a model component like oasis3mct does not need cores
146+ # since its technically a library
147+ # So start_proc and end_proc will be None. Skip it
148+ if start_proc == None or end_proc == None :
149+ continue
150+
151+ # number of cores needed
152+ no_cpus = end_proc - start_proc + 1
153+
154+ # check if execution_command or executable exist
155+ if "execution_command" in config [model ]:
156+ command = "./" + config [model ]["execution_command" ]
157+ elif "executable" in config [model ]:
158+ command = "./" + config [model ]["executable" ]
159+ else :
160+ print ('warning: the executable or execution_command could not be detemined for %s' % (model ,))
161+ continue
162+
163+ # the mpirun command is set here.
164+ mpirun_options += (
165+ " -np %d %s :" % (no_cpus , command )
166+ )
167+
168+ mpirun_options = mpirun_options [:- 1 ] # remove trailing ":"
169+
170+ with open (hostfile , "w" ) as hostfile :
171+ hostfile .write (mpirun_options )
172+
173+
116174 @staticmethod
117175 def get_job_state (jobid ):
118176 """
0 commit comments