|
15 | 15 | ansible.builtin.set_fact:
|
16 | 16 | elasticsearch_http_protocol: "https"
|
17 | 17 |
|
18 |
| -# Usually we should not need this step. It's only there to recover from broken upgrade plays |
19 |
| -# Without this step the cluster would never recover and the play would always fail |
20 |
| -- name: Enable shard allocation for the cluster |
21 |
| - ansible.builtin.uri: |
22 |
| - url: "{{ elasticsearch_http_protocol }}://{{ elasticsearch_api_host }}:{{ elasticstack_elasticsearch_http_port }}/_cluster/settings" |
23 |
| - method: PUT |
24 |
| - body: '{ "persistent": { "cluster.routing.allocation.enable": null }}' |
25 |
| - body_format: json |
26 |
| - user: elastic |
27 |
| - password: "{{ elasticstack_password.stdout }}" |
28 |
| - validate_certs: no |
29 |
| - register: response |
30 |
| - # next line is boolean not string, so no quotes around true |
31 |
| - # use python truthiness |
32 |
| - until: "response.json.acknowledged == true" |
33 |
| - retries: 5 |
34 |
| - delay: 30 |
35 |
| - |
36 |
| - |
37 |
| -# |
38 |
| -# Start cluster health check |
39 |
| -# |
40 |
| - |
41 |
| -# this step is key!!! Don't restart more nodes until we can safely do so. This either requires a green cluster status, or a yellow status with 0 initializing or relocating shards |
42 |
| -# |
43 |
| -# From https://www.elastic.co/guide/en/elastic-stack/8.17/upgrading-elasticsearch.html |
44 |
| -## During a rolling upgrade, primary shards assigned to a node running the new version cannot have their replicas assigned to a node with the old version. The new version might have a different data format that is not understood by the old version. |
45 |
| -## |
46 |
| -## If it is not possible to assign the replica shards to another node (there is only one upgraded node in the cluster), the replica shards remain unassigned and status stays yellow. |
47 |
| -## |
48 |
| -## In this case, you can proceed once there are no initializing or relocating shards (check the init and relo columns). |
49 |
| - |
50 |
| -- name: Check cluster health |
51 |
| - block: |
52 |
| - - name: Wait for cluster health to return to green |
53 |
| - ansible.builtin.uri: |
54 |
| - url: "{{ elasticsearch_http_protocol }}://{{ elasticsearch_api_host }}:{{ elasticstack_elasticsearch_http_port }}/_cluster/health" |
55 |
| - method: GET |
56 |
| - user: elastic |
57 |
| - password: "{{ elasticstack_password.stdout }}" |
58 |
| - validate_certs: no |
59 |
| - register: response |
60 |
| - until: "response.json.status == 'green'" |
61 |
| - retries: 50 |
62 |
| - delay: 30 |
63 |
| - |
64 |
| - # Timed out while waiting for green cluster |
65 |
| - # Check if we can continue with a yellow cluster |
66 |
| - rescue: |
67 |
| - - name: "Rescue: Check if cluster health is yellow" |
68 |
| - ansible.builtin.uri: |
69 |
| - url: "{{ elasticsearch_http_protocol }}://{{ elasticsearch_api_host }}:{{ elasticstack_elasticsearch_http_port }}/_cluster/health" |
70 |
| - method: GET |
71 |
| - user: elastic |
72 |
| - password: "{{ elasticstack_password.stdout }}" |
73 |
| - validate_certs: no |
74 |
| - register: response |
75 |
| - failed_when: "response.json.status != 'yellow' or response.json.relocating_shards != 0 or response.json.initializing_shards != 0" |
76 |
| - |
77 |
| - - name: "Rescure: Wait before verifying status" |
78 |
| - ansible.builtin.pause: |
79 |
| - seconds: 10 |
80 |
| - |
81 |
| - - name: "Rescue: Verify we can safely continue with yellow cluster" |
82 |
| - ansible.builtin.uri: |
83 |
| - url: "{{ elasticsearch_http_protocol }}://{{ elasticsearch_api_host }}:{{ elasticstack_elasticsearch_http_port }}/_cluster/health" |
84 |
| - method: GET |
85 |
| - user: elastic |
86 |
| - password: "{{ elasticstack_password.stdout }}" |
87 |
| - validate_certs: no |
88 |
| - register: response |
89 |
| - failed_when: "response.json.status != 'yellow' or response.json.relocating_shards != 0 or response.json.initializing_shards != 0" |
90 |
| - |
91 |
| -# |
92 |
| -# End cluster health check |
93 |
| -# |
| 18 | +# This step is here primarily in order to recover from broken/restarted upgrade or rolling restart. |
| 19 | +# TODO: Only run this task for the first host. |
| 20 | +- name: Cluster health check |
| 21 | + ansible.builtin.include_tasks: elasticsearch-wait-for-cluster-health.yml |
94 | 22 |
|
95 | 23 |
|
96 | 24 | # Disabling shard allocation right after enabling it seems redundant. Please see above for details.
|
|
0 commit comments