diff --git a/docker-compose.yml b/docker-compose.yml index af55be00c..ec9adda16 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -452,6 +452,7 @@ services: restart: ${DOCKER__RESTART_POLICY:-unless-stopped} volumes: - ./cache/synpuf1k:/app/synpuf1k + - ./cache/vocab/:/app/vocab - ./cache/drivers/ngdbc-latest.jar:/app/inst/drivers/ngdbc-latest.jar - ./cache/drivers/ngdbc-latest.jar:/app/inst/libraries/ngdbc-latest.jar - duckdb-data-1:/app/duckdb_data @@ -1696,14 +1697,6 @@ services: DUCKDB__DATA_FOLDER: /home/docker/duckdb_data LOCAL_DEBUG: "false" - alp-dataflow-gen-data-load-agent: - container_name: &c3 alp-dataflow-gen-data-load-agent-${BASE_PORT:-1} - extends: alp-dataflow-gen-agent - volumes: - - ./cache/vocab/:/tmp/data/vocab - - ./cache/synpuf1k/:/tmp/data/synpuf1k - profiles: [data-load] - alp-perseus-files-manager: image: perseushub/files-manager:latest container_name: files-manager diff --git a/docs/2-load/6-load-synpuf1k.md b/docs/2-load/6-load-synpuf1k.md index 9add91f8b..231b8e63f 100644 --- a/docs/2-load/6-load-synpuf1k.md +++ b/docs/2-load/6-load-synpuf1k.md @@ -94,17 +94,10 @@ yarn create-postgres-cdm-schemas alpdev_pg cdmdefault cdmvocab - Run the following commands to seed postgres cdm schemas with synpuf-1k ```bash -yarn start:data-load - -docker stop alp-dataflow-gen-agent-1 - -docker exec -it alp-dataflow-gen-data-load-agent-1 prefect deployment run data-load-plugin/data-load-plugin_deployment --param options='{"files":[{"name": "Location","path": "/tmp/data/synpuf1k/002_LOCATION.csv", "truncate": "True", "table_name": "location"},{"name": "CARE_SITE","path": "/tmp/data/synpuf1k/003_CARE_SITE.csv", "truncate": "True", "table_name": "care_site"},{"name": "Provider","path": "/tmp/data/synpuf1k/004_PROVIDER.csv", "truncate": "True", "table_name": "provider"},{"name": "Cost","path": "/tmp/data/synpuf1k/005_COST.csv", "truncate": "True", "table_name": "cost"},{"name": "Person","path": "/tmp/data/synpuf1k/006_PERSON.csv", "truncate": "True", "table_name": "person"},{"name": "Death","path": "/tmp/data/synpuf1k/007_DEATH.csv", "truncate": "True", "table_name": "death"},{"name": "Condition_Occirence","path": "/tmp/data/synpuf1k/008_CONDITION_OCCURRENCE.csv", "truncate": "True", "table_name": "condition_occurrence"},{"name": "Condition_Era","path": "/tmp/data/synpuf1k/009_CONDITION_ERA.csv", "truncate": "True", "table_name": "condition_era"},{"name": "Device_Exposure","path": "/tmp/data/synpuf1k/010_DEVICE_EXPOSURE.csv", "truncate": "True", "table_name": "device_exposure"},{"name": "Drug_Exposure","path": "/tmp/data/synpuf1k/011_DRUG_EXPOSURE.csv", "truncate": "True", "table_name": "drug_exposure"},{"name": "Drug_Era","path": "/tmp/data/synpuf1k/012_DRUG_ERA.csv", "truncate": "True", "table_name": "drug_era"},{"name": "Measurement","path": "/tmp/data/synpuf1k/013_MEASUREMENT.csv", "truncate": "True", "table_name": "measurement"},{"name": "Observation","path": "/tmp/data/synpuf1k/014_OBSERVATION.csv", "truncate": "True", "table_name": "observation"},{"name": "Observation_Period","path": "/tmp/data/synpuf1k/015_OBSERVATION_PERIOD.csv", "truncate": "True", "table_name": "observation_period"},{"name": "Payer_Plan_Period","path": "/tmp/data/synpuf1k/016_PAYER_PLAN_PERIOD.csv", "truncate": "True", "table_name": "payer_plan_period"},{"name": "Procedure_Occurrence","path": "/tmp/data/synpuf1k/017_PROCEDURE_OCCURRENCE.csv", "truncate": "True", "table_name": "procedure_occurrence"},{"name": "Visit_Occurrence","path": "/tmp/data/synpuf1k/018_VISIT_OCCURRENCE.csv", "truncate": "True", "table_name": "visit_occurrence"}],"schema_name":"cdmdefault","header":"true","delimiter":",","database_code": "alpdev_pg", "chunksize": "50000", "encoding": "utf_8"}' +docker exec -it alp-dataflow-gen-agent-1 prefect deployment run data-load-plugin/data-load-plugin_deployment --param options='{"files":[{"name": "Location","path": "/app/synpuf1k/002_LOCATION.csv", "truncate": "True", "table_name": "location"},{"name": "CARE_SITE","path": "/app/synpuf1k/003_CARE_SITE.csv", "truncate": "True", "table_name": "care_site"},{"name": "Provider","path": "/app/synpuf1k/004_PROVIDER.csv", "truncate": "True", "table_name": "provider"},{"name": "Cost","path": "/app/synpuf1k/005_COST.csv", "truncate": "True", "table_name": "cost"},{"name": "Person","path": "/app/synpuf1k/006_PERSON.csv", "truncate": "True", "table_name": "person"},{"name": "Death","path": "/app/synpuf1k/007_DEATH.csv", "truncate": "True", "table_name": "death"},{"name": "Condition_Occirence","path": "/app/synpuf1k/008_CONDITION_OCCURRENCE.csv", "truncate": "True", "table_name": "condition_occurrence"},{"name": "Condition_Era","path": "/app/synpuf1k/009_CONDITION_ERA.csv", "truncate": "True", "table_name": "condition_era"},{"name": "Device_Exposure","path": "/app/synpuf1k/010_DEVICE_EXPOSURE.csv", "truncate": "True", "table_name": "device_exposure"},{"name": "Drug_Exposure","path": "/app/synpuf1k/011_DRUG_EXPOSURE.csv", "truncate": "True", "table_name": "drug_exposure"},{"name": "Drug_Era","path": "/app/synpuf1k/012_DRUG_ERA.csv", "truncate": "True", "table_name": "drug_era"},{"name": "Measurement","path": "/app/synpuf1k/013_MEASUREMENT.csv", "truncate": "True", "table_name": "measurement"},{"name": "Observation","path": "/app/synpuf1k/014_OBSERVATION.csv", "truncate": "True", "table_name": "observation"},{"name": "Observation_Period","path": "/app/synpuf1k/015_OBSERVATION_PERIOD.csv", "truncate": "True", "table_name": "observation_period"},{"name": "Payer_Plan_Period","path": "/app/synpuf1k/016_PAYER_PLAN_PERIOD.csv", "truncate": "True", "table_name": "payer_plan_period"},{"name": "Procedure_Occurrence","path": "/app/synpuf1k/017_PROCEDURE_OCCURRENCE.csv", "truncate": "True", "table_name": "procedure_occurrence"},{"name": "Visit_Occurrence","path": "/app/synpuf1k/018_VISIT_OCCURRENCE.csv", "truncate": "True", "table_name": "visit_occurrence"}],"schema_name":"cdmdefault","header":"true","delimiter":",","database_code": "alpdev_pg", "chunksize": "50000", "encoding": "utf_8"}' ``` -- Docker container logs can be checked with the bash command `docker logs --tail 100 alp-dataflow-gen-data-load-agent-1` -- Once the flow is completed, the container logs the message "Finished in state Completed()". After which run the following commands to stop the data-load agent and start dataflow-gen-agent -```bash -docker stop alp-dataflow-gen-data-load-agent-1 -docker start alp-dataflow-gen-agent-1 +- Docker container logs can be checked with the bash command `docker logs --tail 100 alp-dataflow-gen-agent-1` +- Once the flow is completed, the container logs the message "Finished in state Completed()" ``` - Confirm data loaded with ``` diff --git a/docs/2-load/7-load-vocab.md b/docs/2-load/7-load-vocab.md index b899991db..9ae0ce9c7 100644 --- a/docs/2-load/7-load-vocab.md +++ b/docs/2-load/7-load-vocab.md @@ -51,19 +51,10 @@ wc -l *.csv | sort - Run the following command in terminal to stop an alp docker container and start another container to load data ```bash -yarn start:data-load - -docker stop alp-dataflow-gen-agent-1 - -docker exec -it alp-dataflow-gen-data-load-agent-1 prefect deployment run data-load-plugin/data-load-plugin_deployment --param options='{"files":[{"name": "CONCEPT_ANCESTOR","path": "/tmp/data/vocab/CONCEPT_ANCESTOR.csv", "truncate": "True", "table_name": "concept_ancestor"},{"name": "CONCEPT_CLASS","path": "/tmp/data/vocab/CONCEPT_CLASS.csv", "truncate": "True", "table_name": "concept_class"},{"name": "CONCEPT_RELATIONSHIP","path": "/tmp/data/vocab/CONCEPT_RELATIONSHIP.csv", "truncate": "True", "table_name": "concept_relationship"},{"name": "CONCEPT_SYNONYM","path": "/tmp/data/vocab/CONCEPT_SYNONYM.csv", "truncate": "True", "table_name": "concept_synonym"},{"name": "CONCEPT","path": "/tmp/data/vocab/CONCEPT.csv", "truncate": "True", "table_name": "concept"},{"name": "DOMAIN","path": "/tmp/data/vocab/DOMAIN.csv", "truncate": "True", "table_name": "domain"},{"name": "DRUG_STRENGTH","path": "/tmp/data/vocab/DRUG_STRENGTH.csv", "truncate": "True", "table_name": "drug_strength"},{"name": "RELATIONSHIP","path": "/tmp/data/vocab/RELATIONSHIP.csv", "truncate": "True", "table_name": "relationship"},{"name": "VOCABULARY","path": "/tmp/data/vocab/VOCABULARY.csv", "truncate": "True", "table_name": "vocabulary"}],"schema_name":"cdmvocab","header":"true","delimiter":"\t","database_code": "alpdev_pg", "chunksize": "50000", "encoding": "utf_8"}' -``` -- Docker container logs can be checked with the bash command `docker logs --tail 100 alp-dataflow-gen-data-load-agent-1` -- Once the flow is completed, the container logs the message "Finished in state Completed()". After which run the following commands to stop the data-load agent and start dataflow-gen-agent - -```bash -docker stop alp-dataflow-gen-data-load-agent-1 -docker start alp-dataflow-gen-agent-1 +docker exec -it alp-dataflow-gen-agent-1 prefect deployment run data-load-plugin/data-load-plugin_deployment --param options='{"files":[{"name": "CONCEPT_ANCESTOR","path": "/app/vocab/CONCEPT_ANCESTOR.csv", "truncate": "True", "table_name": "concept_ancestor"},{"name": "CONCEPT_CLASS","path": "/app/vocab/CONCEPT_CLASS.csv", "truncate": "True", "table_name": "concept_class"},{"name": "CONCEPT_RELATIONSHIP","path": "/app/vocab/CONCEPT_RELATIONSHIP.csv", "truncate": "True", "table_name": "concept_relationship"},{"name": "CONCEPT_SYNONYM","path": "/app/vocab/CONCEPT_SYNONYM.csv", "truncate": "True", "table_name": "concept_synonym"},{"name": "CONCEPT","path": "/app/vocab/CONCEPT.csv", "truncate": "True", "table_name": "concept"},{"name": "DOMAIN","path": "/app/vocab/DOMAIN.csv", "truncate": "True", "table_name": "domain"},{"name": "DRUG_STRENGTH","path": "/app/vocab/DRUG_STRENGTH.csv", "truncate": "True", "table_name": "drug_strength"},{"name": "RELATIONSHIP","path": "/app/vocab/RELATIONSHIP.csv", "truncate": "True", "table_name": "relationship"},{"name": "VOCABULARY","path": "/app/vocab/VOCABULARY.csv", "truncate": "True", "table_name": "vocabulary"}],"schema_name":"cdmvocab","header":"true","delimiter":"\t","database_code": "alpdev_pg", "chunksize": "50000", "encoding": "utf_8"}' ``` +- Docker container logs can be checked with the bash command `docker logs --tail 100 alp-dataflow-gen-agent-1` +- Once the flow is completed, the container logs the message "Finished in state Completed()" - note: expected output is > COPY ${LINE_COUNT} diff --git a/package.json b/package.json index cf37fbfc0..6a69abe0b 100644 --- a/package.json +++ b/package.json @@ -47,7 +47,6 @@ "tsc": "ts-node internal/build/buildservices.ts build --only-tsc", "inittestdb": "node internal/build/initdb.js test", "removetestdb": "node internal/build/initdb.js rmonly", - "start:data-load": "yarn local:minerva --profile data-load up --wait", "internal": "npm run-script --prefix internal" }, "devDependencies": {