Skip to content

Commit 69b9ede

Browse files
authored
Merge pull request galaxyproject#19 from galaxyproject/community-updates-bugfixes
Numerous features and bugfixes from #1
2 parents 0d347ce + 20b685d commit 69b9ede

File tree

8 files changed

+162
-28
lines changed

8 files changed

+162
-28
lines changed

README.md

Lines changed: 93 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,44 +1,129 @@
11
Slurm
22
=====
33

4-
Install and configure Slurm
4+
Install and configure a Slurm cluster on RHEL/CentOS or Debian/Ubuntu servers
55

66
Role Variables
77
--------------
88

9-
All variables are optional. If nothing is set, the role will install the Slurm client programs, munge, and create a `slurm.conf` with a single `localhost` node and `debug` partition. See the [defaults](defaults/main.yml) and [example playbook](#example-playbook) for examples.
9+
All variables are optional. If nothing is set, the role will install the Slurm client programs, munge, and
10+
create a `slurm.conf` with a single `localhost` node and `debug` partition.
11+
See the [defaults](defaults/main.yml) and [example playbooks](#example-playbooks) for examples.
1012

1113
For the various roles a slurm node can play, you can either set group names, or add values to a list, `slurm_roles`.
1214

1315
- group slurmservers or `slurm_roles: ['controller']`
1416
- group slurmexechosts or `slurm_roles: ['exec']`
1517
- group slurmdbdservers or `slurm_roles: ['dbd']`
1618

17-
General config options for slurm.conf go in `slurm_config`, a hash. Keys are slurm config option names.
19+
General config options for slurm.conf go in `slurm_config`, a hash. Keys are Slurm config option names.
1820

1921
Partitions and nodes go in `slurm_partitions` and `slurm_nodes`, lists of hashes. The only required key in the hash is
2022
`name`, which becomes the `PartitionName` or `NodeName` for that line. All other keys/values are placed on to the line
2123
of that partition or node.
2224

23-
Set `slurm_upgrade` true to upgrade.
25+
Options for the additional configuration files [acct_gather.conf](https://slurm.schedmd.com/acct_gather.conf.html),
26+
[cgroup.conf](https://slurm.schedmd.com/cgroup.conf.html) and [gres.conf](https://slurm.schedmd.com/gres.conf.html)
27+
may be specified in the `slurm_acct_gather_config`, `slurm_cgroup_config` (both of them hashes) and
28+
`slurm_gres_config` (list of hashes) respectively.
2429

25-
You can use `slurm_user` (a hash) and `slurm_create_user` (a bool) to pre-create a Slurm user (so that uids match). See
30+
Set `slurm_upgrade` to true to upgrade the installed Slurm packages.
31+
32+
You can use `slurm_user` (a hash) and `slurm_create_user` (a bool) to pre-create a Slurm user so that uids match.
33+
34+
Note that this role requires root access, so enable ``become`` either globally in your playbook / on the commandline or
35+
just for the role like [shown below](#example-playbooks).
2636

2737
Dependencies
2838
------------
2939

3040
None.
3141

32-
Example Playbook
33-
----------------
42+
Example Playbooks
43+
-----------------
44+
45+
Minimal setup, all services on one node:
3446

3547
```yaml
3648
- name: Slurm all in One
3749
hosts: all
3850
vars:
3951
slurm_roles: ['controller', 'exec', 'dbd']
4052
roles:
41-
- galaxyproject.slurm
53+
- role: galaxyproject.slurm
54+
become: True
55+
```
56+
57+
More extensive example:
58+
59+
```yaml
60+
- name: Slurm execution hosts
61+
hosts: all
62+
roles:
63+
- role: galaxyproject.slurm
64+
become: True
65+
vars:
66+
slurm_cgroup_config:
67+
CgroupMountpoint: "/sys/fs/cgroup"
68+
CgroupAutomount: yes
69+
ConstrainCores: yes
70+
TaskAffinity: no
71+
ConstrainRAMSpace: yes
72+
ConstrainSwapSpace: no
73+
ConstrainDevices: no
74+
AllowedRamSpace: 100
75+
AllowedSwapSpace: 0
76+
MaxRAMPercent: 100
77+
MaxSwapPercent: 100
78+
MinRAMSpace: 30
79+
slurm_config:
80+
AccountingStorageType: "accounting_storage/none"
81+
ClusterName: cluster
82+
GresTypes: gpu
83+
JobAcctGatherType: "jobacct_gather/none"
84+
MpiDefault: none
85+
ProctrackType: "proctrack/cgroup"
86+
ReturnToService: 1
87+
SchedulerType: "sched/backfill"
88+
SelectType: "select/cons_res"
89+
SelectTypeParameters: "CR_Core"
90+
SlurmctldHost: "slurmctl"
91+
SlurmctldLogFile: "/var/log/slurm/slurmctld.log"
92+
SlurmctldPidFile: "/var/run/slurmctld.pid"
93+
SlurmdLogFile: "/var/log/slurm/slurmd.log"
94+
SlurmdPidFile: "/var/run/slurmd.pid"
95+
SlurmdSpoolDir: "/var/spool/slurmd"
96+
StateSaveLocation: "/var/spool/slurmctld"
97+
SwitchType: "switch/none"
98+
TaskPlugin: "task/affinity,task/cgroup"
99+
TaskPluginParam: Sched
100+
slurm_create_user: yes
101+
slurm_gres_config:
102+
- File: /dev/nvidia[0-3]
103+
Name: gpu
104+
NodeName: gpu[01-10]
105+
Type: tesla
106+
slurm_munge_key: "../../../munge.key"
107+
slurm_nodes:
108+
- name: "gpu[01-10]"
109+
CoresPerSocket: 18
110+
Gres: "gpu:tesla:4"
111+
Sockets: 2
112+
ThreadsPerCore: 2
113+
slurm_partitions:
114+
- name: gpu
115+
Default: YES
116+
MaxTime: UNLIMITED
117+
Nodes: "gpu[01-10]"
118+
slurm_roles: ['exec']
119+
slurm_user:
120+
comment: "Slurm Workload Manager"
121+
gid: 888
122+
group: slurm
123+
home: "/var/lib/slurm"
124+
name: slurm
125+
shell: "/usr/sbin/nologin"
126+
uid: 888
42127
```
43128
44129
License

handlers/main.yml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,8 @@
11
---
2+
- name: restart munge
3+
service:
4+
name: munge
5+
state: restarted
26

37
- name: reload slurmd
48
service:

tasks/_inc_extra_configs.yml

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
---
2+
3+
- name: Install extra execution host configs
4+
template:
5+
src: "{{ item.template }}"
6+
dest: "{{ slurm_config_dir }}/{{ item.name }}"
7+
backup: yes
8+
with_items:
9+
- name: acct_gather.conf
10+
config: slurm_acct_gather_config
11+
template: generic.conf.j2
12+
- name: cgroup.conf
13+
config: slurm_cgroup_config
14+
template: generic.conf.j2
15+
- name: gres.conf
16+
config: slurm_gres_config
17+
template: gres.conf.j2
18+
loop_control:
19+
label: "{{ item.name }}"
20+
when: item.config in vars
21+
notify:
22+
- reload slurmctld
23+
- reload slurmd

tasks/munge.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@
1616
group: munge
1717
mode: 0400
1818
when: slurm_munge_key is defined
19+
notify:
20+
- restart munge
1921

2022
- name: Ensure Munge is enabled and running
2123
service:

tasks/slurmctld.yml

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,3 +24,10 @@
2424
mode: 0755
2525
state: directory
2626
when: slurm_create_dirs and __slurm_config_merged.SlurmctldLogFile != omit
27+
28+
- name: Include config dir creation tasks
29+
include_tasks: _inc_create_config_dir.yml
30+
when: slurm_create_dirs
31+
32+
- name: Include extra config creation tasks
33+
include_tasks: _inc_extra_configs.yml

tasks/slurmd.yml

Lines changed: 2 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -29,12 +29,5 @@
2929
include_tasks: _inc_create_config_dir.yml
3030
when: slurm_create_dirs
3131

32-
- name: Install extra execution host configs
33-
template:
34-
src: generic.conf.j2
35-
dest: "{{ slurm_config_dir }}/{{ item.name }}"
36-
backup: yes
37-
with_items:
38-
- name: cgroup.conf
39-
config: slurm_cgroup_config
40-
when: item.config in vars
32+
- name: Include extra config creation tasks
33+
include_tasks: _inc_extra_configs.yml

tasks/slurmdbd.yml

Lines changed: 20 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -5,21 +5,30 @@
55
name: "{{ __slurm_packages.slurmdbd }}"
66
state: "{{ 'latest' if slurm_upgrade else 'present' }}"
77

8-
- name: Install slurmdbd.conf
9-
template:
10-
src: generic.conf.j2
11-
dest: "{{ slurm_config_dir }}/slurmdbd.conf"
12-
owner: "{{ __slurm_user_name }}"
13-
group: root
14-
mode: 0400
15-
notify:
16-
- reload slurmdbd
17-
188
- name: Create slurm log directory
199
file:
2010
path: "{{ __slurmdbd_config_merged.LogFile | dirname }}"
2111
owner: "{{ __slurm_user_name }}"
2212
group: "{{ __slurm_group_name }}"
2313
mode: 0755
2414
state: directory
25-
when: slurm_create_dirs and __slurmdbd_config_merged.LogFile
15+
when: slurm_create_dirs and __slurmdbd_config_merged.LogFile != omit
16+
17+
- name: Include config dir creation tasks
18+
include_tasks: _inc_create_config_dir.yml
19+
when: slurm_create_dirs
20+
21+
- name: Install slurmdbd.conf
22+
template:
23+
src: generic.conf.j2
24+
dest: "{{ slurm_config_dir }}/{{ item.name }}"
25+
owner: "{{ __slurm_user_name }}"
26+
group: root
27+
mode: 0600
28+
with_items:
29+
- name: slurmdbd.conf
30+
config: __slurmdbd_config_merged
31+
loop_control:
32+
label: "{{ item.name }}"
33+
notify:
34+
- reload slurmdbd

templates/gres.conf.j2

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
##
2+
## This file is maintained by Ansible - ALL MODIFICATIONS WILL BE REVERTED
3+
##
4+
5+
{% set conf = lookup('vars', item.config) %}
6+
{% for gres in conf %}
7+
{% if gres['NodeName'] is not none %}
8+
NodeName={{ gres['NodeName'] }}{% for key in gres | sort %}{% if key != 'NodeName' %} {{ key }}={{ gres[key] }}{% endif %}{% endfor %}
9+
10+
{% endif %}
11+
{% endfor %}

0 commit comments

Comments
 (0)