Skip to content

Commit d5f7f54

Browse files
authored
Remove redundant restart (#279)
Restarting Elasticsearch takes quite a while and may lead to connection issues as well as sync issues. So keeping restarts to a minimum is important. These changes will make sure that, even when the `Restart Elasticsearch` handler is notified, it will only restart if Elasticsearch was running before. If there's a fresh start (after reconfiguration) we don't need to restart again. Same goes for Logstash and Kibana. Some restarts of these tools happen fairly fast. But others (like after fresh installs or updates) will trigger internal jobs that should not be intercepted by another restart. Beats restart very fast and as far as I know there's not a big downside to restarting them right after the first start so I didn't include them in the change. Additionally, this PR will make sure some tasks in `verify.yml` of the full stack are only run when the service to be checked is actually running on this node. This helps with spreading services over nodes to save ressources. Since GitHub hosted runners are quite low on ressources we can't run every service on every node in a cluster setup anymore. So this PR will make sure that only Elasticsearch runs everywhere and the others are spread out. Caches get cleared after every role in during a Molecule test. This helps with saving ressources, too. Elasticsearch still won't sync all shards due to full volumes, the watermarks for Elasticseach are set to extremely high volumes so that the cluster can at least get into sync. fixes #278 fixes #141 fixes #194
1 parent 62bf591 commit d5f7f54

File tree

15 files changed

+183
-61
lines changed

15 files changed

+183
-61
lines changed

molecule/elasticsearch_default/converge.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
elasticsearch_disable_systemcallfilterchecks: true
1313
elasticstack_release: "{{ lookup('env', 'ELASTIC_RELEASE') | int}}"
1414
elasticsearch_heap: "1"
15+
elasticstack_no_log: false
1516
tasks:
1617
- name: Include Elastics repos role
1718
ansible.builtin.include_role:

molecule/elasticstack_default/converge.yml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,9 @@
1212
vars:
1313
elasticsearch_jna_workaround: true
1414
elasticsearch_disable_systemcallfilterchecks: true
15+
elasticsearch_monitoring_enabled: false
1516
elasticstack_release: "{{ lookup('env', 'ELASTIC_RELEASE') | int}}"
16-
elasticsearch_heap: "1"
17+
elasticsearch_heap: "2"
1718
elasticstack_full_stack: true
1819
elasticstack_no_log: false
1920
logstash_pipeline_unsafe_shutdown: true

molecule/elasticstack_default/molecule.yml

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@ platforms:
1010
groups:
1111
- beats
1212
- logstash
13-
- kibana
1413
- elasticsearch
1514
image: "geerlingguy/docker-${MOLECULE_DISTRO:-centos7}-ansible:latest"
1615
command: ${MOLECULE_DOCKER_COMMAND:-""}
@@ -22,7 +21,6 @@ platforms:
2221
- name: "elasticstack${ELASTIC_RELEASE}-cluster2-${MOLECULE_DISTRO}"
2322
groups:
2423
- beats
25-
- logstash
2624
- kibana
2725
- elasticsearch
2826
image: "geerlingguy/docker-${MOLECULE_DISTRO:-centos7}-ansible:latest"

molecule/elasticstack_default/verify.yml

Lines changed: 41 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -59,42 +59,46 @@
5959
msg: "Elasticsearch received {{ logstash_count.stdout }} events so far"
6060
when: "'elasticsearch' in group_names"
6161

62-
- name: fetch kibana.yml
63-
ansible.builtin.command: cat /etc/kibana/kibana.yml
64-
register: kibanayml
65-
66-
- name: Show kibana.yml
67-
ansible.builtin.debug:
68-
var: kibanayml.stdout_lines
69-
70-
- name: Check for Kibana port
71-
ansible.builtin.wait_for:
72-
port: 5601
73-
timeout: 120
74-
75-
- name: Connect to Kibana
76-
ansible.builtin.command:
77-
curl
78-
-s
79-
-u elastic:{{ elastic_pass.stdout }}
80-
http://{{ ansible_hostname }}:5601/api/status
81-
register: curl_out
82-
failed_when:
83-
- "'green' not in curl_out.stdout"
84-
- "'Elasticsearch is available' not in curl_out.stdout"
85-
86-
# The following might be nicer but doesn't work
87-
#- name: Connect to Kibana
88-
# ansible.builtin.uri:
89-
# url: http://ansible-role-kibana_full_stack:5601/api/status
90-
# user: elastic
91-
# password: "{{ elastic_password.stdout }}"
92-
# return_content: yes
93-
# register: kibana_status
94-
# #failed_when: "'"title": "Green"' not in kibana_status.content"
95-
# failed_when: "'Green' not in kibana_status.content"
96-
97-
- name: Health check
62+
- name: Run Kibana checks
63+
when: "'kibana' in group_names"
64+
block:
65+
66+
- name: Fetch kibana.yml
67+
ansible.builtin.command: cat /etc/kibana/kibana.yml
68+
register: kibanayml
69+
70+
- name: Show kibana.yml
71+
ansible.builtin.debug:
72+
var: kibanayml.stdout_lines
73+
74+
- name: Check for Kibana port
75+
ansible.builtin.wait_for:
76+
port: 5601
77+
timeout: 120
78+
79+
- name: Connect to Kibana
80+
ansible.builtin.command:
81+
curl
82+
-s
83+
-u elastic:{{ elastic_pass.stdout }}
84+
http://{{ ansible_hostname }}:5601/api/status
85+
register: curl_out
86+
failed_when:
87+
- "'green' not in curl_out.stdout"
88+
- "'Elasticsearch is available' not in curl_out.stdout"
89+
90+
# The following might be nicer but doesn't work
91+
#- name: Connect to Kibana
92+
# ansible.builtin.uri:
93+
# url: http://ansible-role-kibana_full_stack:5601/api/status
94+
# user: elastic
95+
# password: "{{ elastic_password.stdout }}"
96+
# return_content: yes
97+
# register: kibana_status
98+
# #failed_when: "'"title": "Green"' not in kibana_status.content"
99+
# failed_when: "'Green' not in kibana_status.content"
100+
101+
- name: Elasticsearch health check
98102
ansible.builtin.uri:
99103
url: https://localhost:{{ elasticstack_elasticsearch_http_port }}/_cluster/health
100104
method: GET
@@ -110,7 +114,7 @@
110114
delay: 10
111115
when: groups['elasticsearch'] | length > 1
112116

113-
- name: Node check
117+
- name: Elasticsearch Node check
114118
ansible.builtin.uri:
115119
url: https://localhost:{{ elasticstack_elasticsearch_http_port }}/_cat/nodes
116120
method: GET

roles/beats/tasks/main.yml

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,3 +82,10 @@
8282
- name: Import Metricbeat tasks
8383
ansible.builtin.import_tasks: metricbeat.yml
8484
when: beats_metricbeat | bool
85+
86+
# Free up some space to let elsticsearch allocate replica in GitHub Action
87+
- name: Remove cache
88+
ansible.builtin.command: >
89+
rm -rf /var/cache/*
90+
changed_when: false
91+
when: ansible_virtualization_type == "container" or ansible_virtualization_type == "docker"

roles/elasticsearch/defaults/main.yml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,12 @@ elasticsearch_cert_expiration_buffer: 30
4848
elasticstack_ca_will_expire_soon: false
4949
elasticsearch_cert_will_expire_soon: false
5050

51+
# only used internally
52+
elasticsearch_freshstart:
53+
changed: false
54+
elasticsearch_freshstart_security:
55+
changed: false
56+
5157
# "global" variables for all roles
5258

5359
elasticstack_release: 8

roles/elasticsearch/handlers/main.yml

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,10 @@
55
name: elasticsearch
66
state: restarted
77
daemon_reload: yes
8-
when: elasticsearch_enable | bool
8+
when:
9+
- elasticsearch_enable | bool
10+
- not elasticsearch_freshstart.changed | bool
11+
- not elasticsearch_freshstart_security.changed | bool
912

1013
- name: Restart kibana if available for elasticsearch certificates
1114
ansible.builtin.include_tasks: handlers/restart_kibana.yml

roles/elasticsearch/tasks/elasticsearch-security.yml

Lines changed: 90 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -352,25 +352,31 @@
352352
name: elasticsearch
353353
state: started
354354
enabled: yes
355+
register: elasticsearch_freshstart_security
355356

356357
- name: Wait for all instances to start
357358
ansible.builtin.include_tasks: wait_for_instance.yml
358359
loop: "{{ groups['elasticsearch'] }}"
359360

360-
- name: Force all notified handlers to run at this point, not waiting for normal sync points
361-
ansible.builtin.meta: flush_handlers
362-
tags:
363-
- certificates
364-
- renew_ca
365-
- renew_es_cert
366-
367-
- name: Wait for all instances to start
368-
ansible.builtin.include_tasks: wait_for_instance.yml
369-
loop: "{{ groups['elasticsearch'] }}"
370-
tags:
371-
- certificates
372-
- renew_ca
373-
- renew_es_cert
361+
- name: Restart if Elasticsearch was already running
362+
when:
363+
- not elasticsearch_freshstart.changed | bool
364+
- not elasticsearch_freshstart_security.changed | bool
365+
block:
366+
- name: Force all notified handlers to run at this point, not waiting for normal sync points
367+
ansible.builtin.meta: flush_handlers
368+
tags:
369+
- certificates
370+
- renew_ca
371+
- renew_es_cert
372+
373+
- name: Wait for all instances to start
374+
ansible.builtin.include_tasks: wait_for_instance.yml
375+
loop: "{{ groups['elasticsearch'] }}"
376+
tags:
377+
- certificates
378+
- renew_ca
379+
- renew_es_cert
374380

375381
- name: Check for passwords being set
376382
ansible.builtin.stat:
@@ -383,6 +389,25 @@
383389
elasticsearch_http_protocol: "https"
384390
when: elasticsearch_http_security
385391

392+
- name: Check for API with bootstrap password
393+
ansible.builtin.uri:
394+
url: "{{ elasticsearch_http_protocol }}://localhost:{{ elasticstack_elasticsearch_http_port }}"
395+
user: elastic
396+
password: "{{ elasticsearch_bootstrap_pw }}"
397+
validate_certs: false
398+
register: elasticsearch_api_status_bootstrap
399+
changed_when: false
400+
no_log: "{{ elasticstack_no_log }}"
401+
when:
402+
- not elasticsearch_passwords_file.stat.exists | bool
403+
- groups['elasticsearch'] | length > 1
404+
until: elasticsearch_api_status_bootstrap.json.cluster_name is defined
405+
retries: 5
406+
delay: 10
407+
408+
# We need this check twice. One to wait for the API to be actually available. And a second time to
409+
# check the actual return code. Should not cause a huge delay.
410+
386411
- name: Check for cluster status with bootstrap password
387412
ansible.builtin.uri:
388413
url: "{{ elasticsearch_http_protocol }}://localhost:{{ elasticstack_elasticsearch_http_port }}/_cluster/health?pretty"
@@ -410,6 +435,57 @@
410435
delegate_to: "{{ elasticstack_ca }}"
411436
when: elasticsearch_passwords_file.stat.exists | bool
412437

438+
- name: Check for API availability with elastic password
439+
ansible.builtin.uri:
440+
url: "{{ elasticsearch_http_protocol }}://localhost:{{ elasticstack_elasticsearch_http_port }}"
441+
user: elastic
442+
password: "{{ elasticstack_password.stdout }}"
443+
validate_certs: false
444+
register: elasticsearch_api_status
445+
changed_when: false
446+
no_log: "{{ elasticstack_no_log }}"
447+
when:
448+
- elasticsearch_passwords_file.stat.exists | bool
449+
- groups['elasticsearch'] | length > 1
450+
until: elasticsearch_api_status.json.cluster_name is defined
451+
retries: 20
452+
delay: 10
453+
454+
- name: Work around low ressources on CI/CD nodes
455+
when: ansible_virtualization_type == "container" or ansible_virtualization_type == "docker"
456+
block:
457+
# Free up some space to let elsticsearch allocate replica in GitHub Action
458+
- name: Remove cache
459+
ansible.builtin.command: >
460+
rm -rf /var/cache/*
461+
changed_when: false
462+
463+
- name: Set persistent watermarks to very high values in Docker # noqa: risky-shell-pipe
464+
ansible.builtin.shell: >
465+
if test -n "$(ps -p $$ | grep bash)"; then set -o pipefail; fi;
466+
curl
467+
-k
468+
-X PUT
469+
"{{ elasticsearch_http_protocol }}://elastic:{{ elasticstack_password.stdout }}@localhost:9200/_cluster/settings"
470+
-H 'Content-Type: application/json' -d
471+
'
472+
{
473+
"persistent": {
474+
"cluster.routing.allocation.disk.watermark.low": "97%",
475+
"cluster.routing.allocation.disk.watermark.high": "98%",
476+
"cluster.routing.allocation.disk.watermark.flood_stage": "99%",
477+
"cluster.routing.allocation.disk.watermark.flood_stage.frozen": "99%"
478+
}
479+
}
480+
'
481+
changed_when: false
482+
no_log: "{{ elasticstack_no_log }}"
483+
when:
484+
- elasticstack_password.stdout is defined
485+
486+
# We need this check twice. One to wait for the API to be actually available. And a second time to
487+
# check the actual return code. Should not cause a huge delay.
488+
413489
- name: Check for cluster status with elastic password
414490
ansible.builtin.uri:
415491
url: "{{ elasticsearch_http_protocol }}://localhost:{{ elasticstack_elasticsearch_http_port }}/_cluster/health?pretty"

roles/elasticsearch/tasks/main.yml

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -178,9 +178,8 @@
178178
when: ansible_virtualization_type == "container" or ansible_virtualization_type == "docker"
179179

180180
# Free up some space to let elsticsearch allocate replica in GitHub Action
181-
- name: Remove cache # noqa: risky-shell-pipe
182-
ansible.builtin.shell: >
183-
if test -n "$(ps -p $$ | grep bash)"; then set -o pipefail; fi;
181+
- name: Remove cache
182+
ansible.builtin.command: >
184183
rm -rf /var/cache/*
185184
changed_when: false
186185
when: ansible_virtualization_type == "container" or ansible_virtualization_type == "docker"
@@ -200,6 +199,7 @@
200199
name: elasticsearch
201200
state: started
202201
enabled: yes
202+
register: elasticsearch_freshstart
203203

204204
- name: Handle cluster setup without security
205205
when: not elasticsearch_security | bool
@@ -237,8 +237,6 @@
237237
group: root
238238
mode: 0644
239239
backup: "{{ elasticsearch_config_backup }}"
240-
notify:
241-
- Restart Elasticsearch
242240
when: elasticsearch_manage_yaml | bool
243241

244242
- name: Show Info about heap

roles/kibana/defaults/main.yml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,9 @@ kibana_cert_will_expire_soon: false
1818
kibana_sniff_on_start: false
1919
kibana_sniff_on_connection_fault: false
2020

21+
kibana_freshstart:
22+
changed: false
23+
2124
# "global" variables for all roles
2225
elasticstack_release: 8
2326
elasticstack_full_stack: true

0 commit comments

Comments
 (0)