diff --git a/README.md b/README.md index 0ea057b..a4bf4e6 100644 --- a/README.md +++ b/README.md @@ -59,9 +59,10 @@ unique set of homogenous nodes: `free --mebi` total * `openhpc_ram_multiplier`. * `ram_multiplier`: Optional. An override for the top-level definition `openhpc_ram_multiplier`. Has no effect if `ram_mb` is set. - * `gres`: Optional. List of dicts defining [generic resources](https://slurm.schedmd.com/gres.html). Each dict must define: + * `gres_autodetect`: Optional. The [auto detection mechanism](https://slurm.schedmd.com/gres.conf.html#OPT_AutoDetect) to use for the generic resources. Note: you must still define the `gres` dictionary (see below) but you only need the define the `conf` key. + * `gres`: Optional. List of dicts defining [generic resources](https://slurm.schedmd.com/gres.html). Each dict should define: - `conf`: A string with the [resource specification](https://slurm.schedmd.com/slurm.conf.html#OPT_Gres_1) but requiring the format `::`, e.g. `gpu:A100:2`. Note the `type` is an arbitrary string. - - `file`: A string with the [File](https://slurm.schedmd.com/gres.conf.html#OPT_File) (path to device(s)) for this resource, e.g. `/dev/nvidia[0-1]` for the above example. + - `file`: Omit if `gres_autodetect` is set. A string with the [File](https://slurm.schedmd.com/gres.conf.html#OPT_File) (path to device(s)) for this resource, e.g. `/dev/nvidia[0-1]` for the above example. Note [GresTypes](https://slurm.schedmd.com/slurm.conf.html#OPT_GresTypes) must be set in `openhpc_config` if this is used. * `features`: Optional. List of [Features](https://slurm.schedmd.com/slurm.conf.html#OPT_Features) strings. * `node_params`: Optional. Mapping of additional parameters and values for diff --git a/templates/gres.conf.j2 b/templates/gres.conf.j2 index bc23ed5..78208bf 100644 --- a/templates/gres.conf.j2 +++ b/templates/gres.conf.j2 @@ -1,11 +1,19 @@ AutoDetect=off {% for nodegroup in openhpc_nodegroups %} -{% for gres in nodegroup.gres | default([]) %} -{% set gres_name, gres_type, _ = gres.conf.split(':') %} -{% set inventory_group_name = openhpc_cluster_name ~ '_' ~ nodegroup.name %} -{% set inventory_group_hosts = groups.get(inventory_group_name, []) %} +{% set gres_list = nodegroup.gres | default([]) %} +{% set gres_autodetect = nodegroup.gres_autodetect | default('off') %} +{% set inventory_group_name = openhpc_cluster_name ~ '_' ~ nodegroup.name %} +{% set inventory_group_hosts = groups.get(inventory_group_name, []) %} +{% if gres_autodetect | default('off') != 'off' %} {% for hostlist in (inventory_group_hosts | hostlist_expression) %} -NodeName={{ hostlist }} Name={{ gres_name }} Type={{ gres_type }} File={{ gres.file }} +NodeName={{ hostlist }} AutoDetect={{ gres_autodetect }} {% endfor %}{# hostlists #} -{% endfor %}{# gres #} +{% else %} +{% for gres in gres_list %} +{% set gres_name, gres_type, _ = gres.conf.split(':') %} +{% for hostlist in (inventory_group_hosts | hostlist_expression) %} +NodeName={{ hostlist }} Name={{ gres_name }} Type={{ gres_type }} File={{ gres.file | mandatory('The gres configuration dictionary: ' ~ gres ~ ' is missing the file key, but gres_autodetect is set to off. The error occured on node group: ' ~ nodegroup.name ~ '. Please add the file key or set gres_autodetect.') }} +{% endfor %}{# hostlists #} +{% endfor %}{# gres #} +{% endif %}{# autodetect #} {% endfor %}{# nodegroup #}