Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove redundant restart #279

Merged
merged 27 commits into from
Oct 16, 2023
Merged
Show file tree
Hide file tree
Changes from 24 commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
5ee74e5
Remove redundant restart
widhalmt Sep 28, 2023
a759794
Only restart Elasticsearch if it was already running
widhalmt Sep 28, 2023
099024d
Check API twice
widhalmt Sep 28, 2023
6d1c7a6
Restrict Elasticsearch restarts
widhalmt Sep 29, 2023
0bd410b
Restart Logstash only when needed
widhalmt Sep 29, 2023
2da2cb9
Fix variable for Elasticsearch fresh start
widhalmt Sep 29, 2023
2140b71
Fix variable names
widhalmt Sep 29, 2023
e75994b
Restart Kibana only when needed
widhalmt Sep 29, 2023
c2a3976
Only restart when all start tasks haven't changed
widhalmt Sep 30, 2023
6f3115c
Give more ressources to test containers
widhalmt Oct 2, 2023
28a1ddb
Typo
widhalmt Oct 2, 2023
02f96e7
Clear cache at end of each role
widhalmt Oct 2, 2023
87a4a18
Workarounds for low ressources on GitHub runners
widhalmt Oct 2, 2023
1929d9f
Typo
widhalmt Oct 2, 2023
510624f
Minimizing services for small CI/CD runners
widhalmt Oct 5, 2023
d0823f6
Add debug tasks
widhalmt Oct 10, 2023
a75e06e
Activate logging in another scenario
widhalmt Oct 10, 2023
74671b2
Don't fail when there's nothing to explain
widhalmt Oct 10, 2023
dd8b1c5
Set watermarks persistent and transient
widhalmt Oct 10, 2023
3ab78f5
Fix curl and add debug
widhalmt Oct 11, 2023
7ff8527
Lint
widhalmt Oct 11, 2023
18753c7
Move watermark change
widhalmt Oct 11, 2023
9831cd9
Remove debug and throttleing
widhalmt Oct 11, 2023
8258929
Undo changes to throtteling.
widhalmt Oct 11, 2023
cf54655
Typo
widhalmt Oct 11, 2023
2f5a802
Merge branch 'main' into fix/redundant_restarts-278
widhalmt Oct 11, 2023
51967e2
Replace unnecessary shell with command
widhalmt Oct 13, 2023
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/test_full_stack.yml
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ jobs:

strategy:
fail-fast: false
max-parallel: 2
max-parallel: 2
matrix:
distro:
- rockylinux8
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/test_roles_pr.yml
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ jobs:

strategy:
fail-fast: false
max-parallel: 2
max-parallel: 2
matrix:
distro:
- rockylinux8
Expand Down
1 change: 1 addition & 0 deletions molecule/elasticsearch_default/converge.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
elasticsearch_disable_systemcallfilterchecks: true
elasticstack_release: "{{ lookup('env', 'ELASTIC_RELEASE') | int}}"
elasticsearch_heap: "1"
elasticstack_no_log: false
tasks:
- name: Include Elastics repos role
ansible.builtin.include_role:
Expand Down
3 changes: 2 additions & 1 deletion molecule/elasticstack_default/converge.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,9 @@
vars:
elasticsearch_jna_workaround: true
elasticsearch_disable_systemcallfilterchecks: true
elasticsearch_monitoring_enabled: false
elasticstack_release: "{{ lookup('env', 'ELASTIC_RELEASE') | int}}"
elasticsearch_heap: "1"
elasticsearch_heap: "2"
elasticstack_full_stack: true
elasticstack_no_log: false
logstash_pipeline_unsafe_shutdown: true
Expand Down
2 changes: 0 additions & 2 deletions molecule/elasticstack_default/molecule.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@ platforms:
groups:
- beats
- logstash
- kibana
- elasticsearch
image: "geerlingguy/docker-${MOLECULE_DISTRO:-centos7}-ansible:latest"
command: ${MOLECULE_DOCKER_COMMAND:-""}
Expand All @@ -22,7 +21,6 @@ platforms:
- name: "elasticstack${ELASTIC_RELEASE}-cluster2-${MOLECULE_DISTRO}"
groups:
- beats
- logstash
- kibana
- elasticsearch
image: "geerlingguy/docker-${MOLECULE_DISTRO:-centos7}-ansible:latest"
Expand Down
78 changes: 41 additions & 37 deletions molecule/elasticstack_default/verify.yml
Original file line number Diff line number Diff line change
Expand Up @@ -59,42 +59,46 @@
msg: "Elasticsearch received {{ logstash_count.stdout }} events so far"
when: "'elasticsearch' in group_names"

- name: fetch kibana.yml
ansible.builtin.command: cat /etc/kibana/kibana.yml
register: kibanayml

- name: Show kibana.yml
ansible.builtin.debug:
var: kibanayml.stdout_lines

- name: Check for Kibana port
ansible.builtin.wait_for:
port: 5601
timeout: 120

- name: Connect to Kibana
ansible.builtin.command:
curl
-s
-u elastic:{{ elastic_pass.stdout }}
http://{{ ansible_hostname }}:5601/api/status
register: curl_out
failed_when:
- "'green' not in curl_out.stdout"
- "'Elasticsearch is available' not in curl_out.stdout"

# The following might be nicer but doesn't work
#- name: Connect to Kibana
# ansible.builtin.uri:
# url: http://ansible-role-kibana_full_stack:5601/api/status
# user: elastic
# password: "{{ elastic_password.stdout }}"
# return_content: yes
# register: kibana_status
# #failed_when: "'"title": "Green"' not in kibana_status.content"
# failed_when: "'Green' not in kibana_status.content"

- name: Health check
- name: Run Kibana checks
when: "'kibana' in group_names"
block:

- name: Fetch kibana.yml
ansible.builtin.command: cat /etc/kibana/kibana.yml
register: kibanayml

- name: Show kibana.yml
ansible.builtin.debug:
var: kibanayml.stdout_lines

- name: Check for Kibana port
ansible.builtin.wait_for:
port: 5601
timeout: 120

- name: Connect to Kibana
ansible.builtin.command:
curl
-s
-u elastic:{{ elastic_pass.stdout }}
http://{{ ansible_hostname }}:5601/api/status
register: curl_out
failed_when:
- "'green' not in curl_out.stdout"
- "'Elasticsearch is available' not in curl_out.stdout"

# The following might be nicer but doesn't work
#- name: Connect to Kibana
# ansible.builtin.uri:
# url: http://ansible-role-kibana_full_stack:5601/api/status
# user: elastic
# password: "{{ elastic_password.stdout }}"
# return_content: yes
# register: kibana_status
# #failed_when: "'"title": "Green"' not in kibana_status.content"
# failed_when: "'Green' not in kibana_status.content"

- name: Elasticsearch health check
ansible.builtin.uri:
url: https://localhost:{{ elasticstack_elasticsearch_http_port }}/_cluster/health
method: GET
Expand All @@ -110,7 +114,7 @@
delay: 10
when: groups['elasticsearch'] | length > 1

- name: Node check
- name: Elasticsearch Node check
ansible.builtin.uri:
url: https://localhost:{{ elasticstack_elasticsearch_http_port }}/_cat/nodes
method: GET
Expand Down
8 changes: 8 additions & 0 deletions roles/beats/tasks/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -82,3 +82,11 @@
- name: Import Metricbeat tasks
ansible.builtin.import_tasks: metricbeat.yml
when: beats_metricbeat | bool

# Free up some space to let elsticsearch allocate replica in GitHub Action
- name: Remove cache # noqa: risky-shell-pipe
ansible.builtin.shell: >
if test -n "$(ps -p $$ | grep bash)"; then set -o pipefail; fi;
rm -rf /var/cache/*
changed_when: false
when: ansible_virtualization_type == "container" or ansible_virtualization_type == "docker"
6 changes: 6 additions & 0 deletions roles/elasticsearch/defaults/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,12 @@ elasticsearch_cert_expiration_buffer: 30
elasticstack_ca_will_expire_soon: false
elasticsearch_cert_will_expire_soon: false

# only used internally
elasticsearch_freshstart:
changed: false
elasticsearch_freshstart_security:
changed: false

# "global" variables for all roles

elasticstack_release: 8
Expand Down
5 changes: 4 additions & 1 deletion roles/elasticsearch/handlers/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,10 @@
name: elasticsearch
state: restarted
daemon_reload: yes
when: elasticsearch_enable | bool
when:
- elasticsearch_enable | bool
- not elasticsearch_freshstart.changed | bool
- not elasticsearch_freshstart_security.changed | bool

- name: Restart kibana if available for elasticsearch certificates
ansible.builtin.include_tasks: handlers/restart_kibana.yml
Expand Down
105 changes: 91 additions & 14 deletions roles/elasticsearch/tasks/elasticsearch-security.yml
Original file line number Diff line number Diff line change
Expand Up @@ -352,25 +352,31 @@
name: elasticsearch
state: started
enabled: yes
register: elasticsearch_freshstart_security

- name: Wait for all instances to start
ansible.builtin.include_tasks: wait_for_instance.yml
loop: "{{ groups['elasticsearch'] }}"

- name: Force all notified handlers to run at this point, not waiting for normal sync points
ansible.builtin.meta: flush_handlers
tags:
- certificates
- renew_ca
- renew_es_cert

- name: Wait for all instances to start
ansible.builtin.include_tasks: wait_for_instance.yml
loop: "{{ groups['elasticsearch'] }}"
tags:
- certificates
- renew_ca
- renew_es_cert
- name: Restart if Elasticsearch was already running
when:
- not elasticsearch_freshstart.changed | bool
- not elasticsearch_freshstart_security.changed | bool
block:
- name: Force all notified handlers to run at this point, not waiting for normal sync points
ansible.builtin.meta: flush_handlers
tags:
- certificates
- renew_ca
- renew_es_cert

- name: Wait for all instances to start
ansible.builtin.include_tasks: wait_for_instance.yml
loop: "{{ groups['elasticsearch'] }}"
tags:
- certificates
- renew_ca
- renew_es_cert

- name: Check for passwords being set
ansible.builtin.stat:
Expand All @@ -383,6 +389,25 @@
elasticsearch_http_protocol: "https"
when: elasticsearch_http_security

- name: Check for API with bootstrap password
ansible.builtin.uri:
url: "{{ elasticsearch_http_protocol }}://localhost:{{ elasticstack_elasticsearch_http_port }}"
user: elastic
password: "{{ elasticsearch_bootstrap_pw }}"
validate_certs: false
register: elasticsearch_api_status_bootstrap
changed_when: false
no_log: "{{ elasticstack_no_log }}"
when:
- not elasticsearch_passwords_file.stat.exists | bool
- groups['elasticsearch'] | length > 1
until: elasticsearch_api_status_bootstrap.json.cluster_name is defined
retries: 5
delay: 10

# We need this check twice. One to wait for the API to be actually available. And a second time to
# check the actual return code. Should not cause a huge delay.

- name: Check for cluster status with bootstrap password
ansible.builtin.uri:
url: "{{ elasticsearch_http_protocol }}://localhost:{{ elasticstack_elasticsearch_http_port }}/_cluster/health?pretty"
Expand Down Expand Up @@ -410,6 +435,58 @@
delegate_to: "{{ elasticstack_ca }}"
when: elasticsearch_passwords_file.stat.exists | bool

- name: Check for API availability with elastic password
ansible.builtin.uri:
url: "{{ elasticsearch_http_protocol }}://localhost:{{ elasticstack_elasticsearch_http_port }}"
user: elastic
password: "{{ elasticstack_password.stdout }}"
validate_certs: false
register: elasticsearch_api_status
changed_when: false
no_log: "{{ elasticstack_no_log }}"
when:
- elasticsearch_passwords_file.stat.exists | bool
- groups['elasticsearch'] | length > 1
until: elasticsearch_api_status.json.cluster_name is defined
retries: 20
delay: 10

- name: Work around low ressources on CI/CD nodes
when: ansible_virtualization_type == "container" or ansible_virtualization_type == "docker"
block:
# Free up some space to let elsticsearch allocate replica in GitHub Action
- name: Remove cache # noqa: risky-shell-pipe
ansible.builtin.shell: >
if test -n "$(ps -p $$ | grep bash)"; then set -o pipefail; fi;
rm -rf /var/cache/*
changed_when: false

- name: Set persistent watermarks to very high values in Docker # noqa: risky-shell-pipe
ansible.builtin.shell: >
if test -n "$(ps -p $$ | grep bash)"; then set -o pipefail; fi;
curl
-k
-X PUT
"{{ elasticsearch_http_protocol }}://elastic:{{ elasticstack_password.stdout }}@localhost:9200/_cluster/settings"
-H 'Content-Type: application/json' -d
'
{
"persistent": {
"cluster.routing.allocation.disk.watermark.low": "97%",
"cluster.routing.allocation.disk.watermark.high": "98%",
"cluster.routing.allocation.disk.watermark.flood_stage": "99%",
"cluster.routing.allocation.disk.watermark.flood_stage.frozen": "99%"
}
}
'
changed_when: false
no_log: "{{ elasticstack_no_log }}"
when:
- elasticstack_password.stdout is defined

# We need this check twice. One to wait for the API to be actually available. And a second time to
# check the actual return code. Should not cause a huge delay.

- name: Check for cluster status with elastic password
ansible.builtin.uri:
url: "{{ elasticsearch_http_protocol }}://localhost:{{ elasticstack_elasticsearch_http_port }}/_cluster/health?pretty"
Expand Down
3 changes: 1 addition & 2 deletions roles/elasticsearch/tasks/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -200,6 +200,7 @@
name: elasticsearch
state: started
enabled: yes
register: elasticsearch_freshstart

- name: Handle cluster setup without security
when: not elasticsearch_security | bool
Expand Down Expand Up @@ -237,8 +238,6 @@
group: root
mode: 0644
backup: "{{ elasticsearch_config_backup }}"
notify:
- Restart Elasticsearch
when: elasticsearch_manage_yaml | bool

- name: Show Info about heap
Expand Down
3 changes: 3 additions & 0 deletions roles/kibana/defaults/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,9 @@ kibana_cert_will_expire_soon: false
kibana_sniff_on_start: false
kibana_sniff_on_connection_fault: false

kibana_freshstart:
changed: false

# "global" variables for all roles
elasticstack_release: 8
elasticstack_full_stack: true
Expand Down
2 changes: 2 additions & 0 deletions roles/kibana/handlers/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,5 @@
ansible.builtin.service:
name: kibana
state: restarted
when:
- not kibana_freshstart.changed | bool
9 changes: 9 additions & 0 deletions roles/kibana/tasks/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@
state: started
enabled: yes
when: kibana_enable | bool
register: kibana_freshstart

# the following is useful when running tests or extra tasks that need to
# have Kibana running. Escape it on Rocky8, because it gets time out with Elastic 8
Expand All @@ -90,3 +91,11 @@
ansible.builtin.wait_for:
host: localhost
port: 5601

# Free up some space to let elsticsearch allocate replica in GitHub Action
- name: Remove cache # noqa: risky-shell-pipe
ansible.builtin.shell: >
if test -n "$(ps -p $$ | grep bash)"; then set -o pipefail; fi;
rm -rf /var/cache/*
changed_when: false
when: ansible_virtualization_type == "container" or ansible_virtualization_type == "docker"
5 changes: 5 additions & 0 deletions roles/logstash/defaults/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,11 @@ logstash_pipeline_identifier: true
logstash_pipeline_identifier_field_name: "[netways][pipeline]"
logstash_pipeline_identifier_defaults: false

# Only for internal use

logstash_freshstart:
changed: false

elasticstack_ca_dir: /opt/es-ca
elasticstack_initial_passwords: /usr/share/elasticsearch/initial_passwords
elasticstack_ca_pass: PleaseChangeMe
Expand Down
4 changes: 3 additions & 1 deletion roles/logstash/handlers/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,9 @@
ansible.builtin.service:
name: logstash
state: restarted
when: logstash_enable | bool
when:
- logstash_enable | bool
- not logstash_freshstart.changed | bool

- name: Restart Logstash noauto
ansible.builtin.service:
Expand Down
Loading