Skip to content

Commit

Permalink
fix test resource script
Browse files Browse the repository at this point in the history
  • Loading branch information
rcannood committed Aug 27, 2024
1 parent 9fcb4db commit 9877b68
Show file tree
Hide file tree
Showing 6 changed files with 38 additions and 112 deletions.
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
*.egg-info
__pycache__/
.ipynb_checkpoints
results/
/results/
/temp/
.snakemake

logs/*
Expand Down
2 changes: 1 addition & 1 deletion common
33 changes: 19 additions & 14 deletions scripts/create_test_resources.sh
Original file line number Diff line number Diff line change
Expand Up @@ -9,34 +9,34 @@ cd "$REPO_ROOT"
set -e

DATASET_ID="10x_xenium/2023_10x_mouse_brain_xenium"
RAW_OUT="resources/tmp_datasets_raw/$DATASET_ID"
RESOURCES_OUT="resources/datasets/10x_xenium/$DATASET_ID"
TMP_DIR="temp/datasets/$DATASET_ID"
OUT_DIR="resources_test/common/2023_10x_mouse_brain_xenium"

# https://cf.10xgenomics.com/samples/xenium/1.0.2/Xenium_V1_FF_Mouse_Brain_MultiSection_1/Xenium_V1_FF_Mouse_Brain_MultiSection_1_outs.zip
# https://cf.10xgenomics.com/samples/xenium/1.0.2/Xenium_V1_FF_Mouse_Brain_MultiSection_2/Xenium_V1_FF_Mouse_Brain_MultiSection_2_outs.zip
# https://cf.10xgenomics.com/samples/xenium/1.0.2/Xenium_V1_FF_Mouse_Brain_MultiSection_3/Xenium_V1_FF_Mouse_Brain_MultiSection_3_outs.zip


rep1="$RAW_OUT/Xenium_V1_FF_Mouse_Brain_MultiSection_1_outs"
rep2="$RAW_OUT/Xenium_V1_FF_Mouse_Brain_MultiSection_2_outs"
rep3="$RAW_OUT/Xenium_V1_FF_Mouse_Brain_MultiSection_3_outs"
rep1="$TMP_DIR/Xenium_V1_FF_Mouse_Brain_MultiSection_1_outs"
rep2="$TMP_DIR/Xenium_V1_FF_Mouse_Brain_MultiSection_2_outs"
rep3="$TMP_DIR/Xenium_V1_FF_Mouse_Brain_MultiSection_3_outs"

if [ ! -d "$rep1" ]; then
wget https://cf.10xgenomics.com/samples/xenium/1.0.2/Xenium_V1_FF_Mouse_Brain_MultiSection_1/Xenium_V1_FF_Mouse_Brain_MultiSection_1_outs.zip \
-O $RAW_OUT/Xenium_V1_FF_Mouse_Brain_MultiSection_1_outs.zip
unzip $RAW_OUT/Xenium_V1_FF_Mouse_Brain_MultiSection_1_outs.zip -d $RAW_OUT/Xenium_V1_FF_Mouse_Brain_MultiSection_1
-O $TMP_DIR/Xenium_V1_FF_Mouse_Brain_MultiSection_1_outs.zip
unzip $TMP_DIR/Xenium_V1_FF_Mouse_Brain_MultiSection_1_outs.zip -d $TMP_DIR/Xenium_V1_FF_Mouse_Brain_MultiSection_1
fi

if [ ! -d "$rep2" ]; then
wget https://cf.10xgenomics.com/samples/xenium/1.0.2/Xenium_V1_FF_Mouse_Brain_MultiSection_2/Xenium_V1_FF_Mouse_Brain_MultiSection_2_outs.zip \
-O $RAW_OUT/Xenium_V1_FF_Mouse_Brain_MultiSection_2_outs.zip
unzip $RAW_OUT/Xenium_V1_FF_Mouse_Brain_MultiSection_2_outs.zip -d $RAW_OUT/Xenium_V1_FF_Mouse_Brain_MultiSection_2
-O $TMP_DIR/Xenium_V1_FF_Mouse_Brain_MultiSection_2_outs.zip
unzip $TMP_DIR/Xenium_V1_FF_Mouse_Brain_MultiSection_2_outs.zip -d $TMP_DIR/Xenium_V1_FF_Mouse_Brain_MultiSection_2
fi

if [ ! -d "$rep3" ]; then
wget https://cf.10xgenomics.com/samples/xenium/1.0.2/Xenium_V1_FF_Mouse_Brain_MultiSection_3/Xenium_V1_FF_Mouse_Brain_MultiSection_3_outs.zip \
-O $RAW_OUT/Xenium_V1_FF_Mouse_Brain_MultiSection_3_outs.zip
unzip $RAW_OUT/Xenium_V1_FF_Mouse_Brain_MultiSection_3_outs.zip -d $RAW_OUT/Xenium_V1_FF_Mouse_Brain_MultiSection_3
-O $TMP_DIR/Xenium_V1_FF_Mouse_Brain_MultiSection_3_outs.zip
unzip $TMP_DIR/Xenium_V1_FF_Mouse_Brain_MultiSection_3_outs.zip -d $TMP_DIR/Xenium_V1_FF_Mouse_Brain_MultiSection_3
fi

# convert to zarr and concatenate
Expand All @@ -47,7 +47,7 @@ viash run src/data_loaders/download_10x_xenium/config.vsh.yaml -- \
--replicate_id rep1 \
--replicate_id rep2 \
--replicate_id rep3 \
--output $RAW_OUT/full_dataset.zarr \
--output $TMP_DIR/full_dataset.zarr \
--dataset_id "$DATASET_ID" \
--dataset_name "Xenium V1 Fresh Frozen Mouse Brain" \
--dataset_url "https://www.10xgenomics.com/datasets/fresh-frozen-mouse-brain-replicates-1-standard" \
Expand All @@ -57,8 +57,8 @@ viash run src/data_loaders/download_10x_xenium/config.vsh.yaml -- \

# crop the region
viash run src/data_processors/crop_region/config.vsh.yaml -- \
--input $RAW_OUT/full_dataset.zarr \
--output $RESOURCES_OUT/dataset.zarr \
--input "$TMP_DIR/full_dataset.zarr" \
--output "$OUT_DIR/dataset.zarr" \
--replicate_id "rep1" \
--min_x 10000 \
--max_x 12000 \
Expand All @@ -74,3 +74,8 @@ viash run src/data_processors/crop_region/config.vsh.yaml -- \
--max_x 12000 \
--min_y 10000 \
--max_y 12000

aws s3 sync \
"resources_test/common/2023_10x_mouse_brain_xenium" \
"s3://openproblems-data/resources_test/common/2023_10x_mouse_brain_xenium" \
--delete --dryrun
1 change: 1 addition & 0 deletions src/data_processors/crop_region/config.vsh.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ arguments:
name: --output
required: true
description: The output file to write the cropped data to.
direction: output
- type: string
name: --replicate_id
required: false
Expand Down
25 changes: 15 additions & 10 deletions src/data_processors/crop_region/script.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,28 +2,33 @@

## VIASH START
par = {
"input": "resources/datasets/10x_xenium/10x_fresh_frozen_mouse_brain_replicates/dataset.zarr",
"output": "output.zarr",
"replicate": ["rep1"],
"min_x": [10000],
"max_x": [12000],
"min_y": [10000],
"max_y": [12000]
"input": "temp/datasets/10x_xenium/2023_10x_mouse_brain_xenium/full_dataset.zarr",
"output": "resources_test/common/2023_10x_mouse_brain_xenium/dataset.zarr",
"replicate_id": ["rep1", "rep2", "rep3"],
"min_x": [10000, 10000, 10000],
"max_x": [12000, 12000, 12000],
"min_y": [10000, 10000, 10000],
"max_y": [12000, 12000, 12000],
}
## VIASH END

sdata = sd.read_zarr(par["input"])

for i, replicate_id in enumerate(par["replicate"]):
sdata_out = []

for i, replicate_id in enumerate(par["replicate_id"]):
min_x = par["min_x"][i]
max_x = par["max_x"][i]
min_y = par["min_y"][i]
max_y = par["max_y"][i]
sdata = sdata.query.bounding_box(
sdata_query = sdata.query.bounding_box(
axes=["x", "y"],
min_coordinate=[min_x, min_y],
max_coordinate=[max_x, max_y],
target_coordinate_system=f"{replicate_id}_global",
)
sdata_out.append(sdata_query)

sdata_output = sd.concatenate(sdata_out)

sdata.write_zarr(par["output"])
sdata.write(par["output"])
86 changes: 0 additions & 86 deletions src/data_processors/process_dataset/script.py

This file was deleted.

0 comments on commit 9877b68

Please sign in to comment.