-
Notifications
You must be signed in to change notification settings - Fork 3.1k
/
Copy pathcreate-indices.sh
executable file
·203 lines (174 loc) · 8.71 KB
/
create-indices.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
#!/bin/bash
set -e
: ${DATAHUB_ANALYTICS_ENABLED:=true}
: ${USE_AWS_ELASTICSEARCH:=false}
: ${ELASTICSEARCH_INSECURE:=false}
: ${DUE_SHARDS:=1}
: ${DUE_REPLICAS:=1}
# protocol: http or https?
if [[ $ELASTICSEARCH_USE_SSL == true ]]; then
ELASTICSEARCH_PROTOCOL=https
else
ELASTICSEARCH_PROTOCOL=http
fi
echo -e "going to use protocol: $ELASTICSEARCH_PROTOCOL"
# Elasticsearch URL to be suffixed with a resource address
ELASTICSEARCH_URL="$ELASTICSEARCH_PROTOCOL://$ELASTICSEARCH_HOST:$ELASTICSEARCH_PORT"
# set auth header if none is given
if [[ -z $ELASTICSEARCH_AUTH_HEADER ]]; then
if [[ ! -z $ELASTICSEARCH_USERNAME ]]; then
# no auth header given, but username is defined -> use it to create the auth header
AUTH_TOKEN=$(echo -ne "$ELASTICSEARCH_USERNAME:$ELASTICSEARCH_PASSWORD" | base64 --wrap 0)
ELASTICSEARCH_AUTH_HEADER="Authorization:Basic $AUTH_TOKEN"
echo -e "going to use elastic headers based on username and password"
else
# no auth header or username given -> use default auth header
ELASTICSEARCH_AUTH_HEADER="Accept: */*"
echo -e "going to use default elastic headers"
fi
fi
# will be using this for all curl communication with Elasticsearch:
CURL_ARGS=(
--silent
--header "$ELASTICSEARCH_AUTH_HEADER"
)
# ... also optionally use --insecure
if [[ $ELASTICSEARCH_INSECURE == true ]]; then
CURL_ARGS+=(--insecure)
fi
# index prefix used throughout the script
if [[ -z "$INDEX_PREFIX" ]]; then
PREFIX=''
echo -e "not using any prefix"
else
PREFIX="${INDEX_PREFIX}_"
echo -e "going to use prefix: '$PREFIX'"
fi
# path where index definitions are stored
INDEX_DEFINITIONS_ROOT=/index/usage-event
# check Elasticsearch for given index/resource (first argument)
# if it doesn't exist (http code 404), use the given file (second argument) to create it
function create_if_not_exists {
RESOURCE_ADDRESS="$1"
RESOURCE_DEFINITION_NAME="$2"
# query ES to see if the resource already exists
RESOURCE_STATUS=$(curl "${CURL_ARGS[@]}" -o /dev/null -w "%{http_code}\n" "$ELASTICSEARCH_URL/$RESOURCE_ADDRESS")
echo -e "\n>>> GET $RESOURCE_ADDRESS response code is $RESOURCE_STATUS"
if [ $RESOURCE_STATUS -eq 200 ]; then
# resource already exists -> nothing to do
echo -e ">>> $RESOURCE_ADDRESS already exists ✓"
elif [ $RESOURCE_STATUS -eq 404 ]; then
# resource doesn't exist -> need to create it
echo -e ">>> creating $RESOURCE_ADDRESS because it doesn't exist ..."
# use the file at given path as definition, but first replace all occurences of `PREFIX`
# placeholder within the file with the actual prefix value
TMP_SOURCE_PATH="/tmp/$RESOURCE_DEFINITION_NAME"
sed -e "s/PREFIX/$PREFIX/g" "$INDEX_DEFINITIONS_ROOT/$RESOURCE_DEFINITION_NAME" \
| sed -e "s/DUE_SHARDS/$DUE_SHARDS/g" \
| sed -e "s/DUE_REPLICAS/$DUE_REPLICAS/g" \
| tee -a "$TMP_SOURCE_PATH"
curl "${CURL_ARGS[@]}" -XPUT "$ELASTICSEARCH_URL/$RESOURCE_ADDRESS" -H 'Content-Type: application/json' --data "@$TMP_SOURCE_PATH"
elif [ $RESOURCE_STATUS -eq 403 ]; then
# probably authorization fail
echo -e ">>> forbidden access to $RESOURCE_ADDRESS ! -> exiting"
exit 1
else
# when `USE_AWS_ELASTICSEARCH` was forgotten to be set to `true` when running against AWS ES OSS,
# this script will use wrong paths (e.g. `_ilm/policy/` instead of AWS-compatible `_opendistro/_ism/policies/`)
# and the ES endpoint will return `401 Unauthorized` or `405 Method Not Allowed`
# let's use this as chance to point that wrong config might be used!
if [ $RESOURCE_STATUS -eq 401 ] || [ $RESOURCE_STATUS -eq 405 ]; then
if [[ $USE_AWS_ELASTICSEARCH == false ]] && [[ $ELASTICSEARCH_URL == *"amazonaws"* ]]; then
echo "... looks like AWS OpenSearch is used; please set USE_AWS_ELASTICSEARCH env value to true"
fi
fi
echo -e ">>> failed to GET $RESOURCE_ADDRESS ! -> exiting"
exit 1
fi
}
# Update ISM policy. Non-fatal if policy cannot be updated.
function update_ism_policy {
RESOURCE_ADDRESS="$1"
RESOURCE_DEFINITION_NAME="$2"
TMP_CURRENT_POLICY_PATH="/tmp/current-$RESOURCE_DEFINITION_NAME"
# Get existing policy
RESOURCE_STATUS=$(curl "${CURL_ARGS[@]}" -o $TMP_CURRENT_POLICY_PATH -w "%{http_code}\n" "$ELASTICSEARCH_URL/$RESOURCE_ADDRESS")
echo -e "\n>>> GET $RESOURCE_ADDRESS response code is $RESOURCE_STATUS"
if [ $RESOURCE_STATUS -ne 200 ]; then
echo -e ">>> Could not get ISM policy $RESOURCE_ADDRESS. Ignoring."
return
fi
SEQ_NO=$(cat $TMP_CURRENT_POLICY_PATH | jq -r '._seq_no')
PRIMARY_TERM=$(cat $TMP_CURRENT_POLICY_PATH | jq -r '._primary_term')
TMP_NEW_RESPONSE_PATH="/tmp/response-$RESOURCE_DEFINITION_NAME"
TMP_NEW_POLICY_PATH="/tmp/new-$RESOURCE_DEFINITION_NAME"
sed -e "s/PREFIX/$PREFIX/g" "$INDEX_DEFINITIONS_ROOT/$RESOURCE_DEFINITION_NAME" \
| sed -e "s/DUE_SHARDS/$DUE_SHARDS/g" \
| sed -e "s/DUE_REPLICAS/$DUE_REPLICAS/g" \
| tee -a "$TMP_NEW_POLICY_PATH"
RESOURCE_STATUS=$(curl "${CURL_ARGS[@]}" -XPUT "$ELASTICSEARCH_URL/$RESOURCE_ADDRESS?if_seq_no=$SEQ_NO&if_primary_term=$PRIMARY_TERM" \
-H 'Content-Type: application/json' -w "%{http_code}\n" -o $TMP_NEW_RESPONSE_PATH --data "@$TMP_NEW_POLICY_PATH")
echo -e "\n>>> PUT $RESOURCE_ADDRESS response code is $RESOURCE_STATUS"
}
# create indices for ES (non-AWS)
function create_datahub_usage_event_datastream() {
# non-AWS env requires creation of three resources for Datahub usage events:
# 1. ILM policy
create_if_not_exists "_ilm/policy/${PREFIX}datahub_usage_event_policy" policy.json
# 2. index template
create_if_not_exists "_index_template/${PREFIX}datahub_usage_event_index_template" index_template.json
# 3. although indexing request creates the data stream, it's not queryable before creation, causing GMS to throw exceptions
create_if_not_exists "_data_stream/${PREFIX}datahub_usage_event" "datahub_usage_event"
}
# create indices for ES OSS (AWS)
function create_datahub_usage_event_aws_elasticsearch() {
# AWS env requires creation of three resources for Datahub usage events:
# 1. ISM policy
create_if_not_exists "_opendistro/_ism/policies/${PREFIX}datahub_usage_event_policy" aws_es_ism_policy.json
# 1.1 ISM policy update if it already existed
if [ $RESOURCE_STATUS -eq 200 ]; then
update_ism_policy "_opendistro/_ism/policies/${PREFIX}datahub_usage_event_policy" aws_es_ism_policy.json
fi
# 2. index template
create_if_not_exists "_template/${PREFIX}datahub_usage_event_index_template" aws_es_index_template.json
# 3. event index datahub_usage_event-000001
# (note that AWS *rollover* indices need to use `^.*-\d+$` naming pattern)
# -> https://aws.amazon.com/premiumsupport/knowledge-center/opensearch-failed-rollover-index/
INDEX_SUFFIX="000001"
# ... but first check whether `datahub_usage_event` wasn't already autocreated by GMS before `datahub_usage_event-000001`
# (as is common case when this script was initially run without properly setting `USE_AWS_ELASTICSEARCH` to `true`)
# -> https://github.com/datahub-project/datahub/issues/5376
USAGE_EVENT_STATUS=$(curl "${CURL_ARGS[@]}" -o /dev/null -w "%{http_code}\n" "$ELASTICSEARCH_URL/${PREFIX}datahub_usage_event")
if [ $USAGE_EVENT_STATUS -eq 200 ]; then
USAGE_EVENT_DEFINITION=$(curl "${CURL_ARGS[@]}" "$ELASTICSEARCH_URL/${PREFIX}datahub_usage_event")
# the definition is expected to contain "datahub_usage_event-000001" string
if [[ $USAGE_EVENT_DEFINITION != *"datahub_usage_event-"* ]]; then
# ... if it doesn't, we need to drop it
echo -e "\n>>> deleting invalid datahub_usage_event ..."
curl "${CURL_ARGS[@]}" -XDELETE "$ELASTICSEARCH_URL/${PREFIX}datahub_usage_event"
# ... and then recreate it below
fi
fi
# ... now we are safe to create the index
create_if_not_exists "${PREFIX}datahub_usage_event-$INDEX_SUFFIX" aws_es_index.json
}
if [[ $DATAHUB_ANALYTICS_ENABLED == true ]]; then
echo -e "\n datahub_analytics_enabled: $DATAHUB_ANALYTICS_ENABLED"
if [[ $USE_AWS_ELASTICSEARCH == false ]]; then
create_datahub_usage_event_datastream || exit 1
else
create_datahub_usage_event_aws_elasticsearch || exit 1
fi
else
echo -e "\ndatahub_analytics_enabled: $DATAHUB_ANALYTICS_ENABLED"
DATAHUB_USAGE_EVENT_INDEX_RESPONSE_CODE=$(curl "${CURL_ARGS[@]}" -o /dev/null -w "%{http_code}" "$ELASTICSEARCH_URL/_cat/indices/${PREFIX}datahub_usage_event")
if [ $DATAHUB_USAGE_EVENT_INDEX_RESPONSE_CODE -eq 404 ]
then
echo -e "\ncreating ${PREFIX}datahub_usage_event"
curl "${CURL_ARGS[@]}" -XPUT "$ELASTICSEARCH_URL/${PREFIX}datahub_usage_event"
elif [ $DATAHUB_USAGE_EVENT_INDEX_RESPONSE_CODE -eq 200 ]; then
echo -e "\n${PREFIX}datahub_usage_event exists"
elif [ $DATAHUB_USAGE_EVENT_INDEX_RESPONSE_CODE -eq 403 ]; then
echo -e "Forbidden so exiting"
fi
fi