@@ -150,33 +150,7 @@ def _add_compute_resource_launch_template(
150150 instance_profiles ,
151151 is_detailed_monitoring_enabled ,
152152 ):
153- # LT network interfaces
154- compute_lt_nw_interfaces = [
155- ec2 .CfnLaunchTemplate .NetworkInterfaceProperty (
156- device_index = 0 ,
157- network_card_index = 0 ,
158- associate_public_ip_address = queue .networking .assign_public_ip ,
159- interface_type = "efa" if compute_resource .efa and compute_resource .efa .enabled else None ,
160- groups = queue_lt_security_groups ,
161- subnet_id = (
162- queue .networking .subnet_ids [0 ] if isinstance (compute_resource , SlurmComputeResource ) else None
163- ),
164- )
165- ]
166-
167- for network_card in compute_resource .network_cards_list [1 :]:
168- compute_lt_nw_interfaces .append (
169- ec2 .CfnLaunchTemplate .NetworkInterfaceProperty (
170- device_index = 0 if network_card .maximum_network_interfaces () == 1 else 1 ,
171- network_card_index = network_card .network_card_index (),
172- associate_public_ip_address = False ,
173- interface_type = "efa" if compute_resource .efa and compute_resource .efa .enabled else None ,
174- groups = queue_lt_security_groups ,
175- subnet_id = (
176- queue .networking .subnet_ids [0 ] if isinstance (compute_resource , SlurmComputeResource ) else None
177- ),
178- )
179- )
153+ compute_lt_nw_interfaces = add_network_interfaces (queue , compute_resource , queue_lt_security_groups )
180154
181155 conditional_template_properties = {}
182156 if compute_resource .is_ebs_optimized :
@@ -385,3 +359,51 @@ def _add_compute_resource_launch_template(
385359 )
386360
387361 return launch_template
362+
363+
364+ def add_network_interfaces (
365+ queue ,
366+ compute_resource ,
367+ queue_lt_security_groups ,
368+ ):
369+ """Generate launch template network interfaces list"""
370+
371+ is_gb200 = compute_resource .instance_types [0 ] == "p6e-gb200.36xlarge"
372+ interface = "efa" if compute_resource .efa and compute_resource .efa .enabled and not is_gb200 else None
373+
374+ compute_lt_nw_interfaces = [
375+ ec2 .CfnLaunchTemplate .NetworkInterfaceProperty (
376+ device_index = 0 ,
377+ network_card_index = 0 ,
378+ associate_public_ip_address = queue .networking .assign_public_ip ,
379+ interface_type = interface ,
380+ groups = queue_lt_security_groups ,
381+ subnet_id = (queue .networking .subnet_ids [0 ] if isinstance (compute_resource , SlurmComputeResource ) else None ),
382+ )
383+ ]
384+
385+ for network_card in compute_resource .network_cards_list [1 :]:
386+ efa_enabled = True if compute_resource .efa and compute_resource .efa .enabled else False
387+ even = network_card .network_card_index () % 2 == 0
388+ # if efa is disabled, and we have a gb200 instance we skip configuring odd numbered indexes
389+ if is_gb200 and not efa_enabled and not even :
390+ continue
391+
392+ interface = "efa" if compute_resource .efa and compute_resource .efa .enabled else None
393+ # if efa is enabled with a gb200 instance, even indexes are configured as efa and the odd as efa-only
394+ if is_gb200 and efa_enabled :
395+ interface = "efa" if even else "efa-only"
396+
397+ compute_lt_nw_interfaces .append (
398+ ec2 .CfnLaunchTemplate .NetworkInterfaceProperty (
399+ device_index = 0 if network_card .maximum_network_interfaces () == 1 else 1 ,
400+ network_card_index = network_card .network_card_index (),
401+ associate_public_ip_address = False ,
402+ interface_type = interface ,
403+ groups = queue_lt_security_groups ,
404+ subnet_id = (
405+ queue .networking .subnet_ids [0 ] if isinstance (compute_resource , SlurmComputeResource ) else None
406+ ),
407+ )
408+ )
409+ return compute_lt_nw_interfaces
0 commit comments