diff --git a/.bumpversion.cfg b/.bumpversion.cfg index b8e02a7c7..0b6a3557a 100644 --- a/.bumpversion.cfg +++ b/.bumpversion.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 1.25.6 +current_version = 1.25.7 commit = True tag = False tag_name = {new_version} @@ -30,11 +30,11 @@ search = {current_version} replace = {new_version} [bumpversion:file:RELEASE.txt] -search = {current_version} 2023-04-20T13:51:49Z +search = {current_version} 2023-04-20T14:15:34Z replace = {new_version} {utcnow:%Y-%m-%dT%H:%M:%SZ} [bumpversion:part:releaseTime] -values = 2023-04-20T13:51:49Z +values = 2023-04-20T14:15:34Z [bumpversion:file(version):birdhouse/config/canarie-api/docker_configuration.py.template] search = 'version': '{current_version}' diff --git a/CHANGES.md b/CHANGES.md index 874e813f9..0ee26f17f 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -17,6 +17,20 @@ [//]: # (list changes here, using '-' for each new entry, remove this when items are added) +[1.25.7](https://github.com/bird-house/birdhouse-deploy/tree/1.25.7) (2023-04-20) +------------------------------------------------------------------------------------------------------------------ + +## Fixes + +- Fix flaky WPS provider responses (i.e.: other WPS birds) causing failure during their registration in `weaver`. + + In some cases, the WPS birds would not respond properly when starting the stack, either because they are still + initiating or due to other temporary failures such as services being restarted until healthy. This fix introduces + a retry mechanism to attempt WPS registration in `weaver` up to `WEAVER_WPS_PROVIDERS_RETRY_COUNT=5` times + (1 initial attempt + 5 retries), and with `WEAVER_WPS_PROVIDERS_RETRY_AFTER=5` second intervals between each retry. + If the maximum number of retries for any WPS provider or the `WEAVER_WPS_PROVIDERS_MAX_TIME` across all registrations + are reached, the operation is aborted. + [1.25.6](https://github.com/bird-house/birdhouse-deploy/tree/1.25.6) (2023-04-20) ------------------------------------------------------------------------------------------------------------------ diff --git a/Makefile b/Makefile index b6d17e4c5..178e0bc01 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # Generic variables override SHELL := bash override APP_NAME := birdhouse-deploy -override APP_VERSION := 1.25.6 +override APP_VERSION := 1.25.7 # utility to remove comments after value of an option variable override clean_opt = $(shell echo "$(1)" | $(_SED) -r -e "s/[ '$'\t'']+$$//g") diff --git a/README.rst b/README.rst index 587cdf4fd..84d9ef1c2 100644 --- a/README.rst +++ b/README.rst @@ -14,13 +14,13 @@ for a full-fledged production platform. * - releases - | |latest-version| |commits-since| -.. |commits-since| image:: https://img.shields.io/github/commits-since/bird-house/birdhouse-deploy/1.25.6.svg +.. |commits-since| image:: https://img.shields.io/github/commits-since/bird-house/birdhouse-deploy/1.25.7.svg :alt: Commits since latest release - :target: https://github.com/bird-house/birdhouse-deploy/compare/1.25.6...master + :target: https://github.com/bird-house/birdhouse-deploy/compare/1.25.7...master -.. |latest-version| image:: https://img.shields.io/badge/tag-1.25.6-blue.svg?style=flat +.. |latest-version| image:: https://img.shields.io/badge/tag-1.25.7-blue.svg?style=flat :alt: Latest Tag - :target: https://github.com/bird-house/birdhouse-deploy/tree/1.25.6 + :target: https://github.com/bird-house/birdhouse-deploy/tree/1.25.7 .. |readthedocs| image:: https://readthedocs.org/projects/birdhouse-deploy/badge/?version=latest :alt: ReadTheDocs Build Status (latest version) diff --git a/RELEASE.txt b/RELEASE.txt index 7e13f18f9..c123b0c56 100644 --- a/RELEASE.txt +++ b/RELEASE.txt @@ -1 +1 @@ -1.25.6 2023-04-20T13:51:49Z +1.25.7 2023-04-20T14:15:34Z diff --git a/birdhouse/components/weaver/default.env b/birdhouse/components/weaver/default.env index f608be972..bb3e66835 100644 --- a/birdhouse/components/weaver/default.env +++ b/birdhouse/components/weaver/default.env @@ -27,6 +27,8 @@ EXTRA_VARS=' $WEAVER_MANAGER_LOG_LEVEL $WEAVER_WORKER_LOG_LEVEL $WEAVER_WPS_PROVIDERS_MAX_TIME + $WEAVER_WPS_PROVIDERS_RETRY_COUNT + $WEAVER_WPS_PROVIDERS_RETRY_AFTER ' # extend the original 'VARS' from 'birdhouse/pavics-compose.sh' to employ them for template substitution # adding them to 'VARS', they will also be validated in case of override of 'default.env' using 'env.local' @@ -87,8 +89,12 @@ export WEAVER_WPS_WORKDIR="/tmp/wps_workdir/weaver" export WEAVER_MANAGER_LOG_LEVEL=INFO export WEAVER_WORKER_LOG_LEVEL=INFO -# control maximum timeout to abandon registration (duration in seconds) +# control maximum timeout to abandon registration (duration in seconds, across whole procedure) export WEAVER_WPS_PROVIDERS_MAX_TIME=120 +# control maximum retries to abandon registration (retries per provider) +export WEAVER_WPS_PROVIDERS_RETRY_COUNT=5 +# control interval time between retries (duration in seconds, counts toward maximum timeout) +export WEAVER_WPS_PROVIDERS_RETRY_AFTER=5 export DELAYED_EVAL=" $DELAYED_EVAL diff --git a/birdhouse/components/weaver/post-docker-compose-up b/birdhouse/components/weaver/post-docker-compose-up index 794a60973..10de53a85 100755 --- a/birdhouse/components/weaver/post-docker-compose-up +++ b/birdhouse/components/weaver/post-docker-compose-up @@ -25,7 +25,11 @@ # list of provider names (comma or space delimited), all are assumed to be available at # "https://${PAVICS_FQDN_PUBLIC}${TWITCHER_PROTECTED_PATH}/" # WEAVER_WPS_PROVIDERS_MAX_TIME: -# limit script execution up to maximum this number of seconds +# limit script execution up to a maximum of this number of seconds +# WEAVER_WPS_PROVIDERS_RETRY_COUNT: +# number of permitted retries to register a given WPS provider +# WEAVER_WPS_PROVIDERS_RETRY_AFTER: +# number of seconds between each retry request as needed # # Following configurations are expected to be inherited from bird-house/weaver-component env.local/default.env: # - MAGPIE_ADMIN_USERNAME @@ -62,13 +66,6 @@ reset_state() { set +vx; eval "${old_state}" } -echo "Running: $0" - -MAGPIE_URL="https://${PAVICS_FQDN_PUBLIC}/magpie" -WEAVER_URL="https://${PAVICS_FQDN_PUBLIC}${TWITCHER_PROTECTED_PATH}/${WEAVER_MANAGER_NAME}" -WEAVER_WPS_PROVIDERS_MAX_TIME=${WEAVER_WPS_PROVIDERS_MAX_TIME:-120} -REQUEST_TIMEOUT=2 - # logging if [ ! -z "$TERM" ]; then YELLOW=${YELLOW:-$(tput setaf 3)} @@ -81,26 +78,47 @@ else fi PREFIX="[Weaver] " ERROR="${PREFIX}${RED}ERROR${NORMAL}: " -WARNING="${PREFIX}${YELLOW}WARNING${NORMAL}: " +WARN="${PREFIX}${YELLOW}WARNING${NORMAL}: " + +echo "${PREFIX}Running: $0" + +MAGPIE_URL="https://${PAVICS_FQDN_PUBLIC}/magpie" +WEAVER_URL="https://${PAVICS_FQDN_PUBLIC}${TWITCHER_PROTECTED_PATH}/${WEAVER_MANAGER_NAME}" +WEAVER_WPS_PROVIDERS_MAX_TIME=${WEAVER_WPS_PROVIDERS_MAX_TIME:-120} +WEAVER_WPS_PROVIDERS_RETRY_AFTER=${WEAVER_WPS_PROVIDERS_RETRY_AFTER:-5} +WEAVER_WPS_PROVIDERS_RETRY_COUNT=${WEAVER_WPS_PROVIDERS_RETRY_COUNT:-5} +# double echo and no quotes used on purpose to remove empty/extra newlines/spaces +WEAVER_WPS_PROVIDERS=$(echo $(echo "${WEAVER_WPS_PROVIDERS}" | tr ',' ' ')) +REQUEST_TIMEOUT=2 if [ -z "${WEAVER_WPS_PROVIDERS}" ]; then - echo "${WARNING}Nothing specified in WEAVER_WPS_PROVIDERS to register WPS remote providers." + echo "${WARN}Nothing specified in WEAVER_WPS_PROVIDERS to register WPS remote providers." reset_state exit 0 fi -echo " Requested Weaver WPS providers: [${WEAVER_WPS_PROVIDERS}]" -echo " Will retry requests at most for ${WEAVER_WPS_PROVIDERS_MAX_TIME}s" +if [ "${WEAVER_WPS_PROVIDERS_RETRY_COUNT}" -lt 0 ]; then + WEAVER_WPS_PROVIDERS_RETRY_AFTER=0 + WEAVER_WPS_PROVIDERS_RETRY_COUNT=0 +fi +if [ "${WEAVER_WPS_PROVIDERS_RETRY_AFTER}" -lt 0 ]; then + WEAVER_WPS_PROVIDERS_RETRY_AFTER=0 +fi + +echo "${PREFIX}Requested Weaver WPS providers: [${WEAVER_WPS_PROVIDERS}]" +echo "${PREFIX}Will retry requests at most for ${WEAVER_WPS_PROVIDERS_MAX_TIME}s" +echo "${PREFIX}Will retry registration of each provider up to ${WEAVER_WPS_PROVIDERS_RETRY_COUNT} times" +echo "${PREFIX}Will retry registration of each provider with ${WEAVER_WPS_PROVIDERS_RETRY_AFTER}s intervals" if [ -z "$WEAVER_CURL_IMAGE" ]; then WEAVER_CURL_IMAGE="curlimages/curl:7.87.0" fi # POSIX portable RNG if RANDOM does not exist on the current shell -RANDOM_NUMBER=${RANDOM:-$(tr -dc 0-9 < /dev/urandom | head -c 5)} +RANDOM_NUMBER=${RANDOM:-$(tr -dc 0-9 < /dev/urandom 2>/dev/null | head -c 5)} # To know when a docker run was started in case it hangs. -DOCKER_RUN_TAG="weaver_post_curl_`date -Isecond | sed 's/:/_/g' | sed 's/+/p/g'`_${RANDOM_NUMBER}" +DOCKER_RUN_TAG="weaver_post_curl_$(date -Isecond | sed 's/:/_/g' | sed 's/+/p/g')_${RANDOM_NUMBER}" curl_cmd() { docker run --rm --name "${DOCKER_RUN_TAG}" "${WEAVER_CURL_IMAGE}" "$@" } @@ -160,7 +178,7 @@ if [ -z "${cookie}" ]; then fi # validate that Magpie token retrieved is adequate -printf "Validate Magpie token..." +printf "%s" "${PREFIX}Validate Magpie token..." resp=$( \ curl_cmd --insecure --silent --location \ -m ${REQUEST_TIMEOUT} \ @@ -220,9 +238,6 @@ while true; do printf "." done -# parse providers -WEAVER_WPS_PROVIDERS="$(echo "${WEAVER_WPS_PROVIDERS}" | tr ',' ' ')" - # move on to actual registration of WPS providers echo "${PREFIX}Using URL: [${WEAVER_URL}]" start_time="$(date -u +%s)" @@ -268,37 +283,53 @@ for prov in ${WEAVER_WPS_PROVIDERS}; do printf "." done - # unregister in case of multiple up/down to regenerate from scratch, don't care if NotFound returned - echo "${PREFIX}Unregistering any remote WPS provider matching [${prov}]." - curl_cmd --insecure --silent --location \ - -m ${REQUEST_TIMEOUT} \ - -w "${PREFIX}Delete [${prov}] response: %{http_code}" -o /dev/null \ - -b "${cookie}" \ - -X DELETE \ - "${WEAVER_URL}/providers/${prov}" - - # register the new provider and validate - printf "\n%s" "${PREFIX}Registering remote WPS provider [${prov}] on [${prov_url}]... " - resp=$( \ + retry=0 + retry_msg="" + total=${WEAVER_WPS_PROVIDERS_RETRY_COUNT} + while true; do + if [ ${retry} -ne 0 ]; then + retry_msg=" (retry: ${retry}/${total})" + fi + # unregister in case of multiple up/down to regenerate from scratch, don't care if NotFound returned + echo "${PREFIX}Unregistering any remote WPS provider matching [${prov}]${retry_msg}." curl_cmd --insecure --silent --location \ -m ${REQUEST_TIMEOUT} \ - -w "\n%{http_code}" \ + -w "${PREFIX}Delete [${prov}] response: %{http_code}${retry_msg}" -o /dev/null \ -b "${cookie}" \ - -H "Content-Type: application/json" \ - -X POST \ - -d "{\"id\": \"${prov}\", \"url\": \"${prov_url}\"}" \ - "${WEAVER_URL}/providers" \ - ) - ret=$? - code=$(echo "${resp}" | tail -n -1) - body=$(echo "${resp}" | head -n -1) - if [ ${ret} -ne 0 ] || [ "${code}" -ne 201 ]; then - printf "\n%s\n" "${ERROR}Failed registration of remote WPS provider [${prov}] on [${prov_url}]." - printf "Error:\n%s\n" "${body}" - reset_state - exit 23 - fi - echo "OK!" + -X DELETE \ + "${WEAVER_URL}/providers/${prov}" + + # register the new provider and validate + printf "\n%s" "${PREFIX}Registering remote WPS provider [${prov}] on [${prov_url}]${retry_msg}... " + resp=$( \ + curl_cmd --insecure --silent --location \ + -m ${REQUEST_TIMEOUT} \ + -w "\n%{http_code}" \ + -b "${cookie}" \ + -H "Content-Type: application/json" \ + -X POST \ + -d "{\"id\": \"${prov}\", \"url\": \"${prov_url}\"}" \ + "${WEAVER_URL}/providers" \ + ) + ret=$? + code=$(echo "${resp}" | tail -n -1) + body=$(echo "${resp}" | head -n -1) + if [ ${ret} -ne 0 ] || [ "${code}" -ne 201 ]; then + printf "\n%s\n" "${WARN}Failed registration of remote WPS provider [${prov}] on [${prov_url}]${retry_msg}." + printf "Error:\n%s\n" "${body}" + if [ ${retry} -gt ${total} ]; then + echo "${ERROR}Maximum retry attempts ${total} reached for WPS provider [${prov}]. Aborting." + reset_state + exit 23 + fi + echo "${WARN}Will retry after ${WEAVER_WPS_PROVIDERS_RETRY_AFTER}s..." + sleep ${WEAVER_WPS_PROVIDERS_RETRY_AFTER} + retry=$((retry+1)) + else + echo "OK!" # displayed on same line after first registration printf + break + fi + done done echo "${PREFIX}All Weaver remote WPS providers registered successfully!" @@ -317,10 +348,10 @@ ${PAVICS_COMPOSE} exec weaver-worker bash "${CELERY_HEALTHCHECK}" | tee "${PAVIC ret_worker=$? out_worker=$(cat "${PAVICS_LOG_DIR}/weaver-worker.log" | tail -n 1 | grep -c "ERROR") if [ ${ret_weaver} -ne 0 ] || [ ${ret_worker} -ne 0 ] || [ "${out_weaver}" -ne 0 ] || [ "${out_worker}" -ne 0 ]; then - echo "Weaver WebApp and/or Worker Celery tasks were not ready. Restarting both..." + echo "${PREFIX}Weaver WebApp and/or Worker Celery tasks were not ready. Restarting both..." ${PAVICS_COMPOSE} restart weaver weaver-worker else - echo "Weaver WebApp and/or Worker Celery tasks are both ready." + echo "${PREFIX}Weaver WebApp and/or Worker Celery tasks are both ready." fi reset_state diff --git a/birdhouse/config/canarie-api/docker_configuration.py.template b/birdhouse/config/canarie-api/docker_configuration.py.template index 7847536c0..9f4d7d40d 100644 --- a/birdhouse/config/canarie-api/docker_configuration.py.template +++ b/birdhouse/config/canarie-api/docker_configuration.py.template @@ -17,8 +17,8 @@ SERVICES = { 'info': { 'name': 'Node', 'synopsis': 'Nodes are data, compute and index endpoints accessed through the PAVICS platform or external clients. The Node service is the backend that allows: data storage, harvesting, indexation and discovery of local and federated data; authentication and authorization; server registration and management. Node service is therefore composed of several other services.', - 'version': '1.25.6', - 'releaseTime': '2023-04-20T13:51:49Z', + 'version': '1.25.7', + 'releaseTime': '2023-04-20T14:15:34Z', 'institution': 'Ouranos', 'researchSubject': 'Climatology', 'supportEmail': '${SUPPORT_EMAIL}', @@ -47,8 +47,8 @@ PLATFORMS = { 'info': { 'name': 'PAVICS', 'synopsis': 'The PAVICS (Power Analytics for Visualization of Climate Science) platform is a collection of climate analysis services served through Open Geospatial Consortium (OGC) protocols. These services include data access, processing and visualization. Both data and algorithms can be accessed either programmatically, through OGC-compliant clients such as QGIS or ArcGIS, or a custom web interface.', - 'version': '1.25.6', - 'releaseTime': '2023-04-20T13:51:49Z', + 'version': '1.25.7', + 'releaseTime': '2023-04-20T14:15:34Z', 'institution': 'Ouranos', 'researchSubject': 'Climatology', 'supportEmail': '${SUPPORT_EMAIL}', diff --git a/docs/source/conf.py b/docs/source/conf.py index f18c4cb85..af6c5cf75 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -69,9 +69,9 @@ # built documents. # # The short X.Y version. -version = '1.25.6' +version = '1.25.7' # The full version, including alpha/beta/rc tags. -release = '1.25.6' +release = '1.25.7' # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages.