From 07b7f99bc3841da3c9a6b8055c88bd38812cb5b1 Mon Sep 17 00:00:00 2001 From: melissacline Date: Wed, 11 Oct 2023 19:19:40 -0700 Subject: [PATCH 1/2] Moved the launching of seqrepo into one place. Added functionality in appendVRids to restart seqrepo when the connection is lost --- pipeline/Makefile | 3 ++- pipeline/utilities/launch_seqrepo.sh | 11 +++++++++++ pipeline/vr/appendVRIds.py | 11 ++++++++++- pipeline/vr/appendvrids.sh | 14 ++++---------- pipeline/workflow/CompileVCFFiles.py | 3 +-- 5 files changed, 28 insertions(+), 14 deletions(-) create mode 100755 pipeline/utilities/launch_seqrepo.sh diff --git a/pipeline/Makefile b/pipeline/Makefile index b2cfd187b..b01d1580a 100755 --- a/pipeline/Makefile +++ b/pipeline/Makefile @@ -47,7 +47,8 @@ start-local-uta: ## starting local uta docker container [ `docker ps -f name=$(UTA_CONTAINER) | wc -l` -gt 1 ] || docker run -dit --name $(UTA_CONTAINER) -p $(UTA_PORT):5432 $(UTA_DOCKER_IMAGE) start-seqrepo-rest-service: - [ `docker ps -f name="seqrepo-rest-service" | wc -l` -gt 1 ] || docker run --name seqrepo-rest-service --detach --rm -p 5000:5000 -v $(SEQ_REPO_DIR):/usr/local/share/seqrepo/ biocommons/seqrepo-rest-service /usr/local/share/seqrepo + [ `docker ps -f name="seqrepo-rest-service" | wc -l` -gt 1 ] || \ + utilities/lauch_seqrepo.sh .ONESHELL: setup-files: ## setup various directories to run pipeline diff --git a/pipeline/utilities/launch_seqrepo.sh b/pipeline/utilities/launch_seqrepo.sh new file mode 100755 index 000000000..f4b27f2b3 --- /dev/null +++ b/pipeline/utilities/launch_seqrepo.sh @@ -0,0 +1,11 @@ +#!/usr/bin/env bash + +SEQ_REPO_DIR="${1:-/usr/local/share/seqrepo/latest}" + +# +# Launch the seqrepo rest API docker container +docker run --name seqrepo-rest-service \ + --detach --rm -p 5000:5000 \ + -v ${SEQ_REPO_DIR}:/mnt/seqrepo \ + biocommons/seqrepo-rest-service \ + /mnt/seqrepo diff --git a/pipeline/vr/appendVRIds.py b/pipeline/vr/appendVRIds.py index 7e884d32c..e7a8b27b4 100644 --- a/pipeline/vr/appendVRIds.py +++ b/pipeline/vr/appendVRIds.py @@ -6,6 +6,8 @@ import csv import socket import urllib3 +import subprocess +import time from ga4gh.core import sha512t24u, ga4gh_digest, ga4gh_identify, ga4gh_serialize from ga4gh.vrs import __version__, models, normalize @@ -86,9 +88,16 @@ def get_vrs_id(hgvs, max_repeats=5): continue except requests.exceptions.ReadTimeout: continue + except requests.exceptions.ConnectionError: + print("Launching seqrepo") + subprocess.Popen("launch_seqrepo.sh") + time.sleep(3) else: allele_dict = allele.as_dict() - return(allele_dict['_id']) + if 'id' in allele_dict: + return(allele_dict['id']) + elif '_id' in allele_dict: + return(allele_dict['_id']) return '-' diff --git a/pipeline/vr/appendvrids.sh b/pipeline/vr/appendvrids.sh index 3bdae8229..f914ecd50 100755 --- a/pipeline/vr/appendvrids.sh +++ b/pipeline/vr/appendvrids.sh @@ -14,14 +14,7 @@ SEQ_REPO_DIR="${4:-/usr/local/share/seqrepo}" # ---------------------------------------------------- [ `docker ps -f name="seqrepo-rest-service" | wc -l` -gt 1 ] \ - || docker run \ - --name seqrepo-rest-service \ - --user=`id -u`:`id -g` \ - --detach --rm -p 5000:5000 \ - --network=host \ - -v ${SEQ_REPO_DIR}:/usr/local/share/seqrepo \ - biocommons/seqrepo-rest-service \ - seqrepo-rest-service /usr/local/share/seqrepo + || ../utilities/launch_seqrepo.sh ${SEQ_REPO_DIR} # ...and wait for it to be available @@ -31,5 +24,6 @@ SEQ_REPO_DIR="${4:-/usr/local/share/seqrepo}" # ---------------------------------------------------- # --- 2. execute append-vr-ids # ---------------------------------------------------- -python3 appendVRIds.py -i ${ARTIFACT_DIR}/${INPUT_FILE} \ - -o ${ARTIFACT_DIR}/${OUTPUT_FILE} +PATH=../utilities:${PATH} python3 appendVRIds.py \ + -i ${ARTIFACT_DIR}/${INPUT_FILE} \ + -o ${ARTIFACT_DIR}/${OUTPUT_FILE} diff --git a/pipeline/workflow/CompileVCFFiles.py b/pipeline/workflow/CompileVCFFiles.py index 32a26bc28..c035f0fc9 100644 --- a/pipeline/workflow/CompileVCFFiles.py +++ b/pipeline/workflow/CompileVCFFiles.py @@ -762,8 +762,7 @@ def run(self): artifacts_dir_host, 'built_with_priors_clean.tsv', 'built_with_vr_ids.tsv', - self.cfg.vr_docker_image_name, - self.cfg.seq_repo_dir + self.cfg.seq_repo_dir ] pipeline_utils.run_process(args) From 249dbaaea75ecfd64d3761facb65b5fba20b38a4 Mon Sep 17 00:00:00 2001 From: melissacline Date: Thu, 19 Oct 2023 14:42:58 -0700 Subject: [PATCH 2/2] Removed the code which attempted to relaunch the seqrepo rest service after it crashes. There is a fixed seqrepo rest service under test --- pipeline/vr/appendVRIds.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/pipeline/vr/appendVRIds.py b/pipeline/vr/appendVRIds.py index e7a8b27b4..f5193ee94 100644 --- a/pipeline/vr/appendVRIds.py +++ b/pipeline/vr/appendVRIds.py @@ -17,7 +17,7 @@ csv.field_size_limit(10000000) SEQREPO_REST_SERVICE_URL = "http://localhost:5000/seqrepo" -#SEQREPO_REST_SERVICE_URL = "https://services.genomicmedlab.org/seqrepo" + DP = SeqRepoRESTDataProxy(base_url=SEQREPO_REST_SERVICE_URL) TLR = Translator(data_proxy=DP, translate_sequence_identifiers=True, @@ -88,10 +88,6 @@ def get_vrs_id(hgvs, max_repeats=5): continue except requests.exceptions.ReadTimeout: continue - except requests.exceptions.ConnectionError: - print("Launching seqrepo") - subprocess.Popen("launch_seqrepo.sh") - time.sleep(3) else: allele_dict = allele.as_dict() if 'id' in allele_dict: