Skip to content

Commit b57009e

Browse files
committed
Make Elasticsearch restarts always rolling
fixes #343
1 parent 65dfa4f commit b57009e

5 files changed

+178
-131
lines changed

roles/elasticsearch/handlers/main.yml

+3-4
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,10 @@
11
---
22
# handlers file for elasticsearch
33
- name: Restart Elasticsearch
4-
ansible.builtin.service:
5-
name: elasticsearch
6-
state: restarted
7-
daemon_reload: yes
4+
ansible.builtin.include_tasks: restart_elasticsearch.yml
5+
with_items: "{{ groups[elasticstack_elasticsearch_group_name] }}"
86
when:
7+
- "hostvars[item].inventory_hostname == inventory_hostname"
98
- elasticsearch_enable | bool
109
- not elasticsearch_freshstart.changed | bool
1110
- not elasticsearch_freshstart_security.changed | bool
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
---
2+
3+
- name: Check for running Elasticsearch service
4+
ansible.builtin.systemd:
5+
name: elasticsearch
6+
register: elasticsearch_running
7+
8+
- name: Include rolling stop
9+
ansible.builtin.include_tasks: tasks/elasticsearch-rolling-stop.yml
10+
11+
- name: Include rolling start
12+
ansible.builtin.include_tasks: tasks/elasticsearch-rolling-start.yml
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
# Ansible
2+
#
3+
# Rolling Upgrade of Elasticsearch with security on
4+
# Source from: author: Jeff Steinmetz, @jeffsteinmetz; Bin Li, @holysoros
5+
# Modifications: author: Daniel Neuberger @netways.de
6+
# More modifications: NETWAYS Professional Services GmbH
7+
# latest tested with Ansible 2.9 and later
8+
9+
---
10+
11+
# For now we support upgrade only for clusters with security enabled
12+
# If you positively need support for safely upgrading clusters without security,
13+
# feel free to open an issue at https://github.com/NETWAYS/ansible-collection-elasticstack/issues
14+
15+
- name: Start elasticsearch
16+
ansible.builtin.service:
17+
name: elasticsearch
18+
enabled: yes
19+
state: started
20+
when:
21+
- elasticsearch_running.status.ActiveState == "active"
22+
- not elasticsearch_unsafe_upgrade_restart | bool
23+
24+
- name: Restart elasticsearch (fast, for non-prod)
25+
ansible.builtin.service:
26+
name: elasticsearch
27+
enabled: yes
28+
state: restarted
29+
when:
30+
- elasticsearch_running.status.ActiveState == "active"
31+
- elasticsearch_unsafe_upgrade_restart | bool
32+
33+
- name: Wait for elasticsearch node to come back up if it was stopped
34+
ansible.builtin.wait_for:
35+
host: "{{ elasticsearch_api_host }}"
36+
port: "{{ elasticstack_elasticsearch_http_port }}"
37+
delay: 30
38+
39+
- name: Confirm the node joins the cluster # noqa: risky-shell-pipe
40+
ansible.builtin.shell: >
41+
if test -n "$(ps -p $$ | grep bash)"; then set -o pipefail; fi;
42+
curl
43+
-k
44+
-u elastic:{{ elasticstack_password.stdout }}
45+
-s
46+
-m 2
47+
'{{ elasticsearch_http_protocol }}://{{ elasticsearch_api_host }}:{{ elasticstack_elasticsearch_http_port }}/_cat/nodes?h=name'
48+
| grep
49+
-E
50+
'^{{ elasticsearch_nodename }}$'
51+
register: result
52+
until: result.rc == 0
53+
retries: 200
54+
delay: 3
55+
changed_when: false
56+
57+
- name: Enable shard allocation for the cluster
58+
ansible.builtin.uri:
59+
url: "{{ elasticsearch_http_protocol }}://{{ elasticsearch_api_host }}:{{ elasticstack_elasticsearch_http_port }}/_cluster/settings"
60+
method: PUT
61+
body: '{ "persistent": { "cluster.routing.allocation.enable": null }}'
62+
body_format: json
63+
user: elastic
64+
password: "{{ elasticstack_password.stdout }}"
65+
validate_certs: no
66+
register: response
67+
# next line is boolean not string, so no quotes around true
68+
# use python truthiness
69+
until: "response.json.acknowledged == true"
70+
retries: 5
71+
delay: 30
72+
73+
- name: Wait for cluster health to return to yellow or green
74+
ansible.builtin.uri:
75+
url: "{{ elasticsearch_http_protocol }}://{{ elasticsearch_api_host }}:{{ elasticstack_elasticsearch_http_port }}/_cluster/health"
76+
method: GET
77+
user: elastic
78+
password: "{{ elasticstack_password.stdout }}"
79+
validate_certs: no
80+
register: response
81+
until: "response.json.status == 'yellow' or response.json.status == 'green'"
82+
retries: 5
83+
delay: 30
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
# Ansible
2+
#
3+
# Rolling Upgrade of Elasticsearch with security on
4+
# Source from: author: Jeff Steinmetz, @jeffsteinmetz; Bin Li, @holysoros
5+
# Modifications: author: Daniel Neuberger @netways.de
6+
# More modifications: NETWAYS Professional Services GmbH
7+
# latest tested with Ansible 2.9 and later
8+
9+
---
10+
11+
# For now we support upgrade only for clusters with security enabled
12+
# If you positively need support for safely upgrading clusters without security,
13+
# feel free to open an issue at https://github.com/NETWAYS/ansible-collection-elasticstack/issues
14+
- name: Set connection protocol to https
15+
ansible.builtin.set_fact:
16+
elasticsearch_http_protocol: "https"
17+
18+
# Usually we should not need this step. It's only there to recover from broken upgrade plays
19+
# Without this step the cluster would never recover and the play would always fail
20+
- name: Enable shard allocation for the cluster
21+
ansible.builtin.uri:
22+
url: "{{ elasticsearch_http_protocol }}://{{ elasticsearch_api_host }}:{{ elasticstack_elasticsearch_http_port }}/_cluster/settings"
23+
method: PUT
24+
body: '{ "persistent": { "cluster.routing.allocation.enable": null }}'
25+
body_format: json
26+
user: elastic
27+
password: "{{ elasticstack_password.stdout }}"
28+
validate_certs: no
29+
register: response
30+
# next line is boolean not string, so no quotes around true
31+
# use python truthiness
32+
until: "response.json.acknowledged == true"
33+
retries: 5
34+
delay: 30
35+
36+
# this step is key!!! Don't restart more nodes
37+
# until all shards have completed recovery
38+
- name: Wait for cluster health to return to green
39+
ansible.builtin.uri:
40+
url: "{{ elasticsearch_http_protocol }}://{{ elasticsearch_api_host }}:{{ elasticstack_elasticsearch_http_port }}/_cluster/health"
41+
method: GET
42+
user: elastic
43+
password: "{{ elasticstack_password.stdout }}"
44+
validate_certs: no
45+
register: response
46+
until: "response.json.status == 'green'"
47+
retries: 50
48+
delay: 30
49+
50+
# Disabling shard allocation right after enabling it seems redundant. Please see above for details.
51+
- name: Disable shard allocation for the cluster
52+
ansible.builtin.uri:
53+
url: "{{ elasticsearch_http_protocol }}://{{ elasticsearch_api_host }}:{{ elasticstack_elasticsearch_http_port }}/_cluster/settings"
54+
method: PUT
55+
body: '{ "persistent": { "cluster.routing.allocation.enable": "none" }}'
56+
body_format: json
57+
user: elastic
58+
password: "{{ elasticstack_password.stdout }}"
59+
validate_certs: no
60+
61+
- name: Stop non essential indexing to speed up shard recovery
62+
ansible.builtin.uri:
63+
url: "{{ elasticsearch_http_protocol }}://{{ elasticsearch_api_host }}:{{ elasticstack_elasticsearch_http_port }}/_flush"
64+
method: POST
65+
user: elastic
66+
password: "{{ elasticstack_password.stdout }}"
67+
validate_certs: no
68+
failed_when: false
69+
70+
- name: Shutdown elasticsearch service
71+
ansible.builtin.service:
72+
name: elasticsearch
73+
enabled: yes
74+
state: stopped
75+
when:
76+
- not elasticsearch_unsafe_upgrade_restart | bool

roles/elasticsearch/tasks/elasticsearch-rolling-upgrade.yml

+4-127
Original file line numberDiff line numberDiff line change
@@ -71,65 +71,8 @@
7171
- groups[elasticstack_elasticsearch_group_name] | length > 1
7272
block:
7373

74-
# Usually we should not need this step. It's only there to recover from broken upgrade plays
75-
# Without this step the cluster would never recover and the play would always fail
76-
- name: Enable shard allocation for the cluster
77-
ansible.builtin.uri:
78-
url: "{{ elasticsearch_http_protocol }}://{{ elasticsearch_api_host }}:{{ elasticstack_elasticsearch_http_port }}/_cluster/settings"
79-
method: PUT
80-
body: '{ "persistent": { "cluster.routing.allocation.enable": null }}'
81-
body_format: json
82-
user: elastic
83-
password: "{{ elasticstack_password.stdout }}"
84-
validate_certs: no
85-
register: response
86-
# next line is boolean not string, so no quotes around true
87-
# use python truthiness
88-
until: "response.json.acknowledged == true"
89-
retries: 5
90-
delay: 30
91-
92-
# this step is key!!! Don't restart more nodes
93-
# until all shards have completed recovery
94-
- name: Wait for cluster health to return to green
95-
ansible.builtin.uri:
96-
url: "{{ elasticsearch_http_protocol }}://{{ elasticsearch_api_host }}:{{ elasticstack_elasticsearch_http_port }}/_cluster/health"
97-
method: GET
98-
user: elastic
99-
password: "{{ elasticstack_password.stdout }}"
100-
validate_certs: no
101-
register: response
102-
until: "response.json.status == 'green'"
103-
retries: 50
104-
delay: 30
105-
106-
# Disabling shard allocation right after enabling it seems redundant. Please see above for details.
107-
- name: Disable shard allocation for the cluster
108-
ansible.builtin.uri:
109-
url: "{{ elasticsearch_http_protocol }}://{{ elasticsearch_api_host }}:{{ elasticstack_elasticsearch_http_port }}/_cluster/settings"
110-
method: PUT
111-
body: '{ "persistent": { "cluster.routing.allocation.enable": "none" }}'
112-
body_format: json
113-
user: elastic
114-
password: "{{ elasticstack_password.stdout }}"
115-
validate_certs: no
116-
117-
- name: Stop non essential indexing to speed up shard recovery
118-
ansible.builtin.uri:
119-
url: "{{ elasticsearch_http_protocol }}://{{ elasticsearch_api_host }}:{{ elasticstack_elasticsearch_http_port }}/_flush"
120-
method: POST
121-
user: elastic
122-
password: "{{ elasticstack_password.stdout }}"
123-
validate_certs: no
124-
failed_when: false
125-
126-
- name: Shutdown elasticsearch service
127-
ansible.builtin.service:
128-
name: elasticsearch
129-
enabled: yes
130-
state: stopped
131-
when:
132-
- not elasticsearch_unsafe_upgrade_restart | bool
74+
- name: Include rolling stop
75+
ansible.builtin.include_tasks: elasticsearch-rolling-stop.yml
13376

13477
- name: Update Elasticsearch - rpm with managed repositories
13578
ansible.builtin.package:
@@ -147,72 +90,6 @@
14790
- ansible_os_family == "Debian" or
14891
not elasticstack_full_stack | bool
14992

150-
- name: Start elasticsearch
151-
ansible.builtin.service:
152-
name: elasticsearch
153-
enabled: yes
154-
state: started
155-
when:
156-
- elasticsearch_running.status.ActiveState == "active"
157-
- not elasticsearch_unsafe_upgrade_restart | bool
158-
159-
- name: Restart elasticsearch (fast, for non-prod)
160-
ansible.builtin.service:
161-
name: elasticsearch
162-
enabled: yes
163-
state: restarted
164-
when:
165-
- elasticsearch_running.status.ActiveState == "active"
166-
- elasticsearch_unsafe_upgrade_restart | bool
167-
168-
- name: Wait for elasticsearch node to come back up if it was stopped
169-
ansible.builtin.wait_for:
170-
host: "{{ elasticsearch_api_host }}"
171-
port: "{{ elasticstack_elasticsearch_http_port }}"
172-
delay: 30
173-
174-
- name: Confirm the node joins the cluster # noqa: risky-shell-pipe
175-
ansible.builtin.shell: >
176-
if test -n "$(ps -p $$ | grep bash)"; then set -o pipefail; fi;
177-
curl
178-
-k
179-
-u elastic:{{ elasticstack_password.stdout }}
180-
-s
181-
-m 2
182-
'{{ elasticsearch_http_protocol }}://{{ elasticsearch_api_host }}:{{ elasticstack_elasticsearch_http_port }}/_cat/nodes?h=name'
183-
| grep
184-
-E
185-
'^{{ elasticsearch_nodename }}$'
186-
register: result
187-
until: result.rc == 0
188-
retries: 200
189-
delay: 3
190-
changed_when: false
191-
192-
- name: Enable shard allocation for the cluster
193-
ansible.builtin.uri:
194-
url: "{{ elasticsearch_http_protocol }}://{{ elasticsearch_api_host }}:{{ elasticstack_elasticsearch_http_port }}/_cluster/settings"
195-
method: PUT
196-
body: '{ "persistent": { "cluster.routing.allocation.enable": null }}'
197-
body_format: json
198-
user: elastic
199-
password: "{{ elasticstack_password.stdout }}"
200-
validate_certs: no
201-
register: response
202-
# next line is boolean not string, so no quotes around true
203-
# use python truthiness
204-
until: "response.json.acknowledged == true"
205-
retries: 5
206-
delay: 30
93+
- name: Include rolling start
94+
ansible.builtin.include_tasks: elasticsearch-rolling-start.yml
20795

208-
- name: Wait for cluster health to return to yellow or green
209-
ansible.builtin.uri:
210-
url: "{{ elasticsearch_http_protocol }}://{{ elasticsearch_api_host }}:{{ elasticstack_elasticsearch_http_port }}/_cluster/health"
211-
method: GET
212-
user: elastic
213-
password: "{{ elasticstack_password.stdout }}"
214-
validate_certs: no
215-
register: response
216-
until: "response.json.status == 'yellow' or response.json.status == 'green'"
217-
retries: 5
218-
delay: 30

0 commit comments

Comments
 (0)