From 637ea64eef898eb3fc136aabb80f9b803acb8e66 Mon Sep 17 00:00:00 2001 From: Kraftfahrzeughaftpflichtversicherung Date: Mon, 8 Sep 2025 16:36:58 +0200 Subject: [PATCH 1/8] bidcell added --- .../bidcell/config.vsh.yaml | 71 + .../bidcell/output/.build.yaml | 0 .../bidcell/output/.config.vsh.yaml | 498 ++++++ .../bidcell/output/bidcell | 1585 +++++++++++++++++ src/methods_segmentation/bidcell/script.py | 318 ++++ 5 files changed, 2472 insertions(+) create mode 100644 src/methods_segmentation/bidcell/config.vsh.yaml create mode 100644 src/methods_segmentation/bidcell/output/.build.yaml create mode 100644 src/methods_segmentation/bidcell/output/.config.vsh.yaml create mode 100755 src/methods_segmentation/bidcell/output/bidcell create mode 100644 src/methods_segmentation/bidcell/script.py diff --git a/src/methods_segmentation/bidcell/config.vsh.yaml b/src/methods_segmentation/bidcell/config.vsh.yaml new file mode 100644 index 000000000..ddc372737 --- /dev/null +++ b/src/methods_segmentation/bidcell/config.vsh.yaml @@ -0,0 +1,71 @@ +name: bidcell +label: "BIDCell Segmentation" +summary: "Cell segmentation using BIDCell deep learning approach" +description: "BIDCell is a deep learning method for cell segmentation in spatial transcriptomics data that uses both morphology and transcript information." +links: + documentation: "https://github.com/SydneyBioX/BIDCell" + repository: "https://github.com/SydneyBioX/BIDCell" +references: + doi: "10.1038/s41467-023-44560-w" + +__merge__: /src/api/comp_method_segmentation.yaml + +arguments: + - name: --single_cell_ref + type: file + description: "Path to single-cell reference data in H5AD format for marker gene identification" + required: false + - name: --max_overlaps_pos + type: integer + default: 4 + description: "Maximum number of cell types that can share a positive marker" + - name: --max_overlaps_neg + type: integer + default: 15 + description: "Maximum number of cell types that can share a negative marker" + - name: --model_epochs + type: integer + default: 10 + description: "Number of training epochs for BIDCell model" + - name: --min_cell_size + type: integer + default: 15 + description: "Minimum cell size in pixels" + +resources: + - type: python_script + path: script.py + +engines: + - type: docker + image: openproblems/base_python:1 + setup: + - type: python + pypi: spatialdata + - type: python + pypi: scanpy + - type: python + pypi: tifffile + - type: python + pypi: opencv-python + - type: python + pypi: natsort + - type: python + pypi: bidcell + - type: python + pypi: dask[dataframe] + - type: python + pypi: spatialdata-plot + - type: python + pypi: pyyaml + - type: python + pypi: scikit-image + __merge__: + - /src/base/setup_txsim_partial.yaml + - type: native + +runners: + - type: executable + - type: nextflow + directives: + label: [ midtime, lowcpu, highmem, gpu ] diff --git a/src/methods_segmentation/bidcell/output/.build.yaml b/src/methods_segmentation/bidcell/output/.build.yaml new file mode 100644 index 000000000..e69de29bb diff --git a/src/methods_segmentation/bidcell/output/.config.vsh.yaml b/src/methods_segmentation/bidcell/output/.config.vsh.yaml new file mode 100644 index 000000000..e0009e04f --- /dev/null +++ b/src/methods_segmentation/bidcell/output/.config.vsh.yaml @@ -0,0 +1,498 @@ +name: "bidcell" +namespace: "methods_segmentation" +version: "dev" +argument_groups: +- name: "Arguments" + arguments: + - type: "file" + name: "--input" + label: "Raw iST Dataset" + summary: "A spatial transcriptomics dataset, preprocessed for this benchmark." + description: "This dataset contains preprocessed images, labels, points, shapes,\ + \ and tables for spatial transcriptomics data.\n" + info: + format: + type: "spatialdata_zarr" + images: + - type: "object" + name: "image" + description: "The raw image data" + required: true + - type: "object" + name: "image_3D" + description: "The raw 3D image data" + required: false + - type: "object" + name: "he_image" + description: "H&E image data" + required: false + labels: + - type: "object" + name: "cell_labels" + description: "Cell segmentation labels" + required: false + - type: "object" + name: "nucleus_labels" + description: "Cell segmentation labels" + required: false + points: + - type: "dataframe" + name: "transcripts" + description: "Point cloud data of transcripts" + required: true + columns: + - type: "float" + name: "x" + required: true + description: "x-coordinate of the point" + - type: "float" + name: "y" + required: true + description: "y-coordinate of the point" + - type: "float" + name: "z" + required: false + description: "z-coordinate of the point" + - type: "categorical" + name: "feature_name" + required: true + description: "Name of the feature" + - type: "integer" + name: "cell_id" + required: false + description: "Unique identifier of the cell" + - type: "integer" + name: "nucleus_id" + required: false + description: "Unique identifier of the nucleus" + - type: "string" + name: "cell_type" + required: false + description: "Cell type of the cell" + - type: "float" + name: "qv" + required: false + description: "Quality value of the point" + - type: "long" + name: "transcript_id" + required: true + description: "Unique identifier of the transcript" + - type: "boolean" + name: "overlaps_nucleus" + required: false + description: "Whether the point overlaps with a nucleus" + shapes: + - type: "dataframe" + name: "cell_boundaries" + description: "Cell boundaries" + required: false + columns: + - type: "object" + name: "geometry" + required: true + description: "Geometry of the cell boundary" + - type: "dataframe" + name: "nucleus_boundaries" + description: "Nucleus boundaries" + required: false + columns: + - type: "object" + name: "geometry" + required: true + description: "Geometry of the nucleus boundary" + tables: + - type: "anndata" + name: "metadata" + description: "Metadata of spatial dataset" + required: true + uns: + - type: "string" + name: "dataset_id" + required: true + description: "A unique identifier for the dataset" + - type: "string" + name: "dataset_name" + required: true + description: "A human-readable name for the dataset" + - type: "string" + name: "dataset_url" + required: true + description: "Link to the original source of the dataset" + - type: "string" + name: "dataset_reference" + required: true + description: "Bibtex reference of the paper in which the dataset was published" + - type: "string" + name: "dataset_summary" + required: true + description: "Short description of the dataset" + - type: "string" + name: "dataset_description" + required: true + description: "Long description of the dataset" + - type: "string" + name: "dataset_organism" + required: true + description: "The organism of the sample in the dataset" + - type: "string" + name: "segmentation_id" + required: true + multiple: true + description: "A unique identifier for the segmentation" + obs: + - type: "string" + name: "cell_id" + required: true + description: "A unique identifier for the cell" + var: + - type: "string" + name: "gene_ids" + required: true + description: "Unique identifier for the gene" + - type: "string" + name: "feature_types" + required: true + description: "Type of the feature" + obsm: + - type: "double" + name: "spatial" + required: true + description: "Spatial coordinates of the cell" + coordinate_systems: + - type: "object" + name: "global" + description: "Coordinate system of the replicate" + required: true + example: + - "resources_test/task_ist_preprocessing/mouse_brain_combined/raw_ist.zarr" + must_exist: true + create_parent: true + required: true + direction: "input" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--output" + label: "Segmentation" + summary: "A segmentation of a spatial transcriptomics dataset" + description: "This dataset contains a segmentation of the spatial transcriptomics\ + \ data.\n" + info: + format: + type: "spatialdata_zarr" + labels: + - type: "object" + name: "segmentation" + description: "Segmentation of the data" + required: true + tables: + - type: "anndata" + name: "table" + description: "AnnData table" + required: true + obs: + - type: "string" + name: "cell_id" + description: "Cell ID" + required: true + - type: "string" + name: "region" + description: "Region" + required: true + example: + - "resources_test/task_ist_preprocessing/mouse_brain_combined/segmentation.zarr" + must_exist: true + create_parent: true + required: true + direction: "output" + multiple: false + multiple_sep: ";" + - type: "file" + name: "--single_cell_ref" + description: "Path to single-cell reference data in H5AD format for marker gene\ + \ identification" + info: null + default: + - "task_ist_preprocessing/resources_test/common/2023_yao_mouse_brain_scrnaseq_10xv2/dataset.h5ad" + must_exist: true + create_parent: true + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--max_overlaps_pos" + description: "Maximum number of cell types that can share a positive marker" + info: null + default: + - 4 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--max_overlaps_neg" + description: "Maximum number of cell types that can share a negative marker" + info: null + default: + - 15 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--model_epochs" + description: "Number of training epochs for BIDCell model" + info: null + default: + - 10 + required: false + direction: "input" + multiple: false + multiple_sep: ";" + - type: "integer" + name: "--min_cell_size" + description: "Minimum cell size in pixels" + info: null + default: + - 15 + required: false + direction: "input" + multiple: false + multiple_sep: ";" +resources: +- type: "python_script" + path: "script.py" + is_executable: true +label: "BIDCell Segmentation" +summary: "Cell segmentation using BIDCell deep learning approach" +description: "BIDCell is a deep learning method for cell segmentation in spatial transcriptomics\ + \ data that uses both morphology and transcript information." +test_resources: +- type: "file" + path: "resources_test/task_ist_preprocessing/mouse_brain_combined" + dest: "resources_test/task_ist_preprocessing/mouse_brain_combined" +- type: "python_script" + path: "run_and_check_output.py" + is_executable: true +- type: "python_script" + path: "check_config.py" + is_executable: true +info: + type: "method" + subtype: "method_segmentation" + type_info: + label: "Segmentation" + summary: "A segmentation of the spatial data into cells" + description: "A segmentation method uses the spatial data to segment an image\ + \ into cells." +status: "enabled" +scope: + image: "public" + target: "public" +repositories: +- type: "github" + name: "openproblems" + repo: "openproblems-bio/openproblems" + tag: "build/main" +- type: "github" + name: "datasets" + repo: "openproblems-bio/datasets" + tag: "build/main" +license: "MIT" +references: + doi: + - "10.1038/s41467-023-44560-w" +links: + repository: "https://github.com/SydneyBioX/BIDCell" + docker_registry: "ghcr.io" + documentation: "https://github.com/SydneyBioX/BIDCell" +runners: +- type: "executable" + id: "executable" + docker_setup_strategy: "ifneedbepullelsecachedbuild" +- type: "nextflow" + id: "nextflow" + directives: + label: + - "midtime" + - "lowcpu" + - "highmem" + - "gpu" + tag: "$id" + auto: + simplifyInput: true + simplifyOutput: false + transcript: false + publish: false + config: + labels: + lowmem: "memory = 20.Gb" + midmem: "memory = 50.Gb" + highmem: "memory = 100.Gb" + lowcpu: "cpus = 5" + midcpu: "cpus = 15" + highcpu: "cpus = 30" + lowtime: "time = 1.h" + midtime: "time = 4.h" + hightime: "time = 8.h" + veryhightime: "time = 24.h" + debug: false + container: "docker" +engines: +- type: "docker" + id: "docker" + image: "openproblems/base_python:1" + namespace_separator: "/" + setup: + - type: "python" + user: false + pypi: + - "squidpy" + - "rasterio" + github: + - "theislab/txsim@dev" + upgrade: true + - type: "python" + user: false + pypi: + - "spatialdata" + upgrade: true + - type: "python" + user: false + pypi: + - "scanpy" + upgrade: true + - type: "python" + user: false + pypi: + - "tifffile" + upgrade: true + - type: "python" + user: false + pypi: + - "opencv-python" + upgrade: true + - type: "python" + user: false + pypi: + - "natsort" + upgrade: true + - type: "python" + user: false + pypi: + - "bidcell" + upgrade: true + - type: "python" + user: false + pypi: + - "dask[dataframe]" + upgrade: true + - type: "python" + user: false + pypi: + - "spatialdata-plot" + upgrade: true + - type: "python" + user: false + pypi: + - "pyyaml" + upgrade: true + - type: "python" + user: false + pypi: + - "scikit-image" + upgrade: true + entrypoint: [] + cmd: null +- type: "native" + id: "native" +build_info: + config: "src/methods_segmentation/bidcell/config.vsh.yaml" + runner: "executable" + engine: "docker|native" + output: "output" + executable: "output/bidcell" + viash_version: "0.9.4" + git_commit: "16c6e21ef81efdd062143cad6b79cf992167e1c5" + git_remote: "https://github.com/openproblems-bio/task_ist_preprocessing.git" +package_config: + name: "task_ist_preprocessing" + version: "dev" + label: "iST Preprocessing" + summary: "Benchmarking approaches for preprocessing imaging-based spatial transcriptomics" + description: "Provide a clear and concise description of your task, detailing the\ + \ specific problem it aims\nto solve. Outline the input data types, the expected\ + \ output, and any assumptions or constraints.\nBe sure to explain any terminology\ + \ or concepts that are essential for understanding the task.\n\nExplain the motivation\ + \ behind your proposed task. Describe the biological or computational\nproblem\ + \ you aim to address and why it's important. Discuss the current state of research\ + \ in\nthis area and any gaps or challenges that your task could help address.\ + \ This section\nshould convince readers of the significance and relevance of your\ + \ task.\n" + info: + image: "The name of the image file to use for the component on the website." + test_resources: + - type: "s3" + path: "s3://openproblems-data/resources_test/common/2023_10x_mouse_brain_xenium_rep1/" + dest: "resources_test/common/2023_10x_mouse_brain_xenium_rep1/" + - type: "s3" + path: "s3://openproblems-data/resources_test/common/2023_yao_mouse_brain_scrnaseq_10xv2/" + dest: "resources_test/common/2023_yao_mouse_brain_scrnaseq_10xv2/" + - type: "s3" + path: "s3://openproblems-data/resources_test/task_ist_preprocessing/" + dest: "resources_test/task_ist_preprocessing" + repositories: + - type: "github" + name: "openproblems" + repo: "openproblems-bio/openproblems" + tag: "build/main" + - type: "github" + name: "datasets" + repo: "openproblems-bio/datasets" + tag: "build/main" + viash_version: "0.9.4" + source: "src" + target: "target" + config_mods: + - ".runners[.type == \"nextflow\"].config.labels := { lowmem : \"memory = 20.Gb\"\ + , midmem : \"memory = 50.Gb\", highmem : \"memory = 100.Gb\", lowcpu : \"cpus\ + \ = 5\", midcpu : \"cpus = 15\", highcpu : \"cpus = 30\", lowtime : \"time = 1.h\"\ + , midtime : \"time = 4.h\", hightime : \"time = 8.h\", veryhightime : \"time =\ + \ 24.h\" }\n" + authors: + - name: "Louis Kümmerle" + roles: + - "author" + - "maintainer" + info: + github: "LouisK92" + orcid: "0000-0002-9193-1243" + - name: "Malte D. Luecken" + roles: + - "author" + info: + github: "LuckyMD" + orcid: "0000-0001-7464-7921" + - name: "Daniel Strobl" + roles: + - "author" + info: + github: "danielStrobl" + orcid: "0000-0002-5516-7057" + - name: "Robrecht Cannoodt" + roles: + - "author" + info: + github: "rcannood" + orcid: "0000-0003-3641-729X" + keywords: + - "spatial transcriptomics" + - "imaging-based spatial transcriptomics" + - "preprocessing" + license: "MIT" + organization: "openproblems-bio" + links: + repository: "https://github.com/openproblems-bio/task_ist_preprocessing" + docker_registry: "ghcr.io" + issue_tracker: "https://github.com/openproblems-bio/task_ist_preprocessing/issues" diff --git a/src/methods_segmentation/bidcell/output/bidcell b/src/methods_segmentation/bidcell/output/bidcell new file mode 100755 index 000000000..f84745748 --- /dev/null +++ b/src/methods_segmentation/bidcell/output/bidcell @@ -0,0 +1,1585 @@ +#!/usr/bin/env bash + +# bidcell dev +# +# This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative +# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data +# Intuitive. +# +# The component may contain files which fall under a different license. The +# authors of this component should specify the license in the header of such +# files, or include a separate license file detailing the licenses of all included +# files. + +set -e + +if [ -z "$VIASH_TEMP" ]; then + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} + VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TMP} + VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} + VIASH_TEMP=${VIASH_TEMP:-$TEMP} + VIASH_TEMP=${VIASH_TEMP:-/tmp} +fi + +# define helper functions +# ViashQuote: put quotes around non flag values +# $1 : unquoted string +# return : possibly quoted string +# examples: +# ViashQuote --foo # returns --foo +# ViashQuote bar # returns 'bar' +# Viashquote --foo=bar # returns --foo='bar' +function ViashQuote { + if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then + echo "$1" | sed "s#=\(.*\)#='\1'#" + elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then + echo "$1" + else + echo "'$1'" + fi +} +# ViashRemoveFlags: Remove leading flag +# $1 : string with a possible leading flag +# return : string without possible leading flag +# examples: +# ViashRemoveFlags --foo=bar # returns bar +function ViashRemoveFlags { + echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' +} +# ViashSourceDir: return the path of a bash file, following symlinks +# usage : ViashSourceDir ${BASH_SOURCE[0]} +# $1 : Should always be set to ${BASH_SOURCE[0]} +# returns : The absolute path of the bash file +function ViashSourceDir { + local source="$1" + while [ -h "$source" ]; do + local dir="$( cd -P "$( dirname "$source" )" >/dev/null 2>&1 && pwd )" + source="$(readlink "$source")" + [[ $source != /* ]] && source="$dir/$source" + done + cd -P "$( dirname "$source" )" >/dev/null 2>&1 && pwd +} +# ViashFindTargetDir: return the path of the '.build.yaml' file, following symlinks +# usage : ViashFindTargetDir 'ScriptPath' +# $1 : The location from where to start the upward search +# returns : The absolute path of the '.build.yaml' file +function ViashFindTargetDir { + local source="$1" + while [[ "$source" != "" && ! -e "$source/.build.yaml" ]]; do + source=${source%/*} + done + echo $source +} +# see https://en.wikipedia.org/wiki/Syslog#Severity_level +VIASH_LOGCODE_EMERGENCY=0 +VIASH_LOGCODE_ALERT=1 +VIASH_LOGCODE_CRITICAL=2 +VIASH_LOGCODE_ERROR=3 +VIASH_LOGCODE_WARNING=4 +VIASH_LOGCODE_NOTICE=5 +VIASH_LOGCODE_INFO=6 +VIASH_LOGCODE_DEBUG=7 +VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE + +# ViashLog: Log events depending on the verbosity level +# usage: ViashLog 1 alert Oh no something went wrong! +# $1: required verbosity level +# $2: display tag +# $3+: messages to display +# stdout: Your input, prepended by '[$2] '. +function ViashLog { + local required_level="$1" + local display_tag="$2" + shift 2 + if [ $VIASH_VERBOSITY -ge $required_level ]; then + >&2 echo "[$display_tag]" "$@" + fi +} + +# ViashEmergency: log events when the system is unstable +# usage: ViashEmergency Oh no something went wrong. +# stdout: Your input, prepended by '[emergency] '. +function ViashEmergency { + ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" +} + +# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) +# usage: ViashAlert Oh no something went wrong. +# stdout: Your input, prepended by '[alert] '. +function ViashAlert { + ViashLog $VIASH_LOGCODE_ALERT alert "$@" +} + +# ViashCritical: log events when a critical condition occurs +# usage: ViashCritical Oh no something went wrong. +# stdout: Your input, prepended by '[critical] '. +function ViashCritical { + ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" +} + +# ViashError: log events when an error condition occurs +# usage: ViashError Oh no something went wrong. +# stdout: Your input, prepended by '[error] '. +function ViashError { + ViashLog $VIASH_LOGCODE_ERROR error "$@" +} + +# ViashWarning: log potentially abnormal events +# usage: ViashWarning Something may have gone wrong. +# stdout: Your input, prepended by '[warning] '. +function ViashWarning { + ViashLog $VIASH_LOGCODE_WARNING warning "$@" +} + +# ViashNotice: log significant but normal events +# usage: ViashNotice This just happened. +# stdout: Your input, prepended by '[notice] '. +function ViashNotice { + ViashLog $VIASH_LOGCODE_NOTICE notice "$@" +} + +# ViashInfo: log normal events +# usage: ViashInfo This just happened. +# stdout: Your input, prepended by '[info] '. +function ViashInfo { + ViashLog $VIASH_LOGCODE_INFO info "$@" +} + +# ViashDebug: log all events, for debugging purposes +# usage: ViashDebug This just happened. +# stdout: Your input, prepended by '[debug] '. +function ViashDebug { + ViashLog $VIASH_LOGCODE_DEBUG debug "$@" +} + +# find source folder of this component +VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` + +# find the root of the built components & dependencies +VIASH_TARGET_DIR=`ViashFindTargetDir $VIASH_META_RESOURCES_DIR` + +# define meta fields +VIASH_META_NAME="bidcell" +VIASH_META_FUNCTIONALITY_NAME="bidcell" +VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_NAME" +VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" +VIASH_META_TEMP_DIR="$VIASH_TEMP" + + + +# initialise variables +VIASH_MODE='run' +VIASH_ENGINE_ID='docker' + +######## Helper functions for setting up Docker images for viash ######## +# expects: ViashDockerBuild + +# ViashDockerInstallationCheck: check whether Docker is installed correctly +# +# examples: +# ViashDockerInstallationCheck +function ViashDockerInstallationCheck { + ViashDebug "Checking whether Docker is installed" + if [ ! command -v docker &> /dev/null ]; then + ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." + exit 1 + fi + + ViashDebug "Checking whether the Docker daemon is running" + local save=$-; set +e + local docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashCritical "Docker daemon does not seem to be running. Try one of the following:" + ViashCritical "- Try running 'dockerd' in the command line" + ViashCritical "- See https://docs.docker.com/config/daemon/" + exit 1 + fi +} + +# ViashDockerRemoteTagCheck: check whether a Docker image is available +# on a remote. Assumes `docker login` has been performed, if relevant. +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerRemoteTagCheck python:latest +# echo $? # returns '0' +# ViashDockerRemoteTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerRemoteTagCheck { + docker manifest inspect $1 > /dev/null 2> /dev/null +} + +# ViashDockerLocalTagCheck: check whether a Docker image is available locally +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# docker pull python:latest +# ViashDockerLocalTagCheck python:latest +# echo $? # returns '0' +# ViashDockerLocalTagCheck sdaizudceahifu +# echo $? # returns '1' +function ViashDockerLocalTagCheck { + [ -n "$(docker images -q $1)" ] +} + +# ViashDockerPull: pull a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPull python:latest +# echo $? # returns '0' +# ViashDockerPull sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPull { + ViashNotice "Checking if Docker image is available at '$1'" + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker pull $1 && return 0 || return 1 + else + local save=$-; set +e + docker pull $1 2> /dev/null > /dev/null + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." + fi + return $out + fi +} + +# ViashDockerPush: push a Docker image +# +# $1 : image identifier with format `[registry/]image[:tag]` +# exit code $? : whether or not the image was found +# examples: +# ViashDockerPush python:latest +# echo $? # returns '0' +# ViashDockerPush sdaizudceahifu +# echo $? # returns '1' +function ViashDockerPush { + ViashNotice "Pushing image to '$1'" + local save=$-; set +e + local out + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + docker push $1 + out=$? + else + docker push $1 2> /dev/null > /dev/null + out=$? + fi + [[ $save =~ e ]] && set -e + if [ $out -eq 0 ]; then + ViashNotice "Container '$1' push succeeded." + else + ViashError "Container '$1' push errored. You might not be logged in or have the necessary permissions." + fi + return $out +} + +# ViashDockerPullElseBuild: pull a Docker image, else build it +# +# $1 : image identifier with format `[registry/]image[:tag]` +# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. +# examples: +# ViashDockerPullElseBuild mynewcomponent +function ViashDockerPullElseBuild { + local save=$-; set +e + ViashDockerPull $1 + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashDockerBuild $@ + fi +} + +# ViashDockerSetup: create a Docker image, according to specified docker setup strategy +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $2 : docker setup strategy, see DockerSetupStrategy.scala +# examples: +# ViashDockerSetup mynewcomponent alwaysbuild +function ViashDockerSetup { + local image_id="$1" + local setup_strategy="$2" + if [ "$setup_strategy" == "alwaysbuild" -o "$setup_strategy" == "build" -o "$setup_strategy" == "b" ]; then + ViashDockerBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "alwayspull" -o "$setup_strategy" == "pull" -o "$setup_strategy" == "p" ]; then + ViashDockerPull $image_id + elif [ "$setup_strategy" == "alwayspullelsebuild" -o "$setup_strategy" == "pullelsebuild" ]; then + ViashDockerPullElseBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "alwayspullelsecachedbuild" -o "$setup_strategy" == "pullelsecachedbuild" ]; then + ViashDockerPullElseBuild $image_id $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "alwayscachedbuild" -o "$setup_strategy" == "cachedbuild" -o "$setup_strategy" == "cb" ]; then + ViashDockerBuild $image_id $(ViashDockerBuildArgs "$engine_id") + elif [[ "$setup_strategy" =~ ^ifneedbe ]]; then + local save=$-; set +e + ViashDockerLocalTagCheck $image_id + local outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashInfo "Image $image_id already exists" + elif [ "$setup_strategy" == "ifneedbebuild" ]; then + ViashDockerBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "ifneedbecachedbuild" ]; then + ViashDockerBuild $image_id $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "ifneedbepull" ]; then + ViashDockerPull $image_id + elif [ "$setup_strategy" == "ifneedbepullelsebuild" ]; then + ViashDockerPullElseBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") + elif [ "$setup_strategy" == "ifneedbepullelsecachedbuild" ]; then + ViashDockerPullElseBuild $image_id $(ViashDockerBuildArgs "$engine_id") + else + ViashError "Unrecognised Docker strategy: $setup_strategy" + exit 1 + fi + elif [ "$setup_strategy" == "push" -o "$setup_strategy" == "forcepush" -o "$setup_strategy" == "alwayspush" ]; then + ViashDockerPush "$image_id" + elif [ "$setup_strategy" == "pushifnotpresent" -o "$setup_strategy" == "gentlepush" -o "$setup_strategy" == "maybepush" ]; then + local save=$-; set +e + ViashDockerRemoteTagCheck $image_id + local outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -eq 0 ]; then + ViashNotice "Container '$image_id' exists, doing nothing." + else + ViashNotice "Container '$image_id' does not yet exist." + ViashDockerPush "$image_id" + fi + elif [ "$setup_strategy" == "donothing" -o "$setup_strategy" == "meh" ]; then + ViashNotice "Skipping setup." + else + ViashError "Unrecognised Docker strategy: $setup_strategy" + exit 1 + fi +} + +# ViashDockerCheckCommands: Check whether a docker container has the required commands +# +# $1 : image identifier with format `[registry/]image[:tag]` +# $@ : commands to verify being present +# examples: +# ViashDockerCheckCommands bash:4.0 bash ps foo +function ViashDockerCheckCommands { + local image_id="$1" + shift 1 + local commands="$@" + local save=$-; set +e + local missing # mark 'missing' as local in advance, otherwise the exit code of the command will be missing and always be '0' + missing=$(docker run --rm --entrypoint=sh "$image_id" -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") + local outCheck=$? + [[ $save =~ e ]] && set -e + if [ $outCheck -ne 0 ]; then + ViashError "Docker container '$image_id' does not contain command '$missing'." + exit 1 + fi +} + +# ViashDockerBuild: build a docker image +# $1 : image identifier with format `[registry/]image[:tag]` +# $... : additional arguments to pass to docker build +# $VIASH_META_TEMP_DIR : temporary directory to store dockerfile & optional resources in +# $VIASH_META_NAME : name of the component +# $VIASH_META_RESOURCES_DIR : directory containing the resources +# $VIASH_VERBOSITY : verbosity level +# exit code $? : whether or not the image was built successfully +function ViashDockerBuild { + local image_id="$1" + shift 1 + + # create temporary directory to store dockerfile & optional resources in + local tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-$VIASH_META_NAME-XXXXXX") + local dockerfile="$tmpdir/Dockerfile" + function clean_up { + rm -rf "$tmpdir" + } + trap clean_up EXIT + + # store dockerfile and resources + ViashDockerfile "$VIASH_ENGINE_ID" > "$dockerfile" + + # generate the build command + local docker_build_cmd="docker build -t '$image_id' $@ '$VIASH_META_RESOURCES_DIR' -f '$dockerfile'" + + # build the container + ViashNotice "Building container '$image_id' with Dockerfile" + ViashInfo "$docker_build_cmd" + local save=$-; set +e + if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then + eval $docker_build_cmd + else + eval $docker_build_cmd &> "$tmpdir/docker_build.log" + fi + + # check exit code + local out=$? + [[ $save =~ e ]] && set -e + if [ $out -ne 0 ]; then + ViashError "Error occurred while building container '$image_id'" + if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then + ViashError "Transcript: --------------------------------" + cat "$tmpdir/docker_build.log" + ViashError "End of transcript --------------------------" + fi + exit 1 + fi +} + +######## End of helper functions for setting up Docker images for viash ######## + +# ViashDockerFile: print the dockerfile to stdout +# $1 : engine identifier +# return : dockerfile required to run this component +# examples: +# ViashDockerFile +function ViashDockerfile { + local engine_id="$1" + + if [[ "$engine_id" == "docker" ]]; then + cat << 'VIASHDOCKER' +FROM openproblems/base_python:1 +ENTRYPOINT [] +RUN pip install --upgrade pip && \ + pip install --upgrade --no-cache-dir "squidpy" "rasterio" && \ + pip install --upgrade --no-cache-dir "git+https://github.com/theislab/txsim@dev" + +RUN pip install --upgrade pip && \ + pip install --upgrade --no-cache-dir "spatialdata" + +RUN pip install --upgrade pip && \ + pip install --upgrade --no-cache-dir "scanpy" + +RUN pip install --upgrade pip && \ + pip install --upgrade --no-cache-dir "tifffile" + +RUN pip install --upgrade pip && \ + pip install --upgrade --no-cache-dir "opencv-python" + +RUN pip install --upgrade pip && \ + pip install --upgrade --no-cache-dir "natsort" + +RUN pip install --upgrade pip && \ + pip install --upgrade --no-cache-dir "bidcell" + +RUN pip install --upgrade pip && \ + pip install --upgrade --no-cache-dir "dask[dataframe]" + +RUN pip install --upgrade pip && \ + pip install --upgrade --no-cache-dir "spatialdata-plot" + +RUN pip install --upgrade pip && \ + pip install --upgrade --no-cache-dir "pyyaml" + +RUN pip install --upgrade pip && \ + pip install --upgrade --no-cache-dir "scikit-image" + +LABEL org.opencontainers.image.description="Companion container for running component methods_segmentation bidcell" +LABEL org.opencontainers.image.created="2025-09-08T16:27:18+02:00" +LABEL org.opencontainers.image.source="https://github.com/SydneyBioX/BIDCell" +LABEL org.opencontainers.image.revision="16c6e21ef81efdd062143cad6b79cf992167e1c5" +LABEL org.opencontainers.image.version="dev" + +VIASHDOCKER + fi +} + +# ViashDockerBuildArgs: return the arguments to pass to docker build +# $1 : engine identifier +# return : arguments to pass to docker build +function ViashDockerBuildArgs { + local engine_id="$1" + + if [[ "$engine_id" == "docker" ]]; then + echo "" + fi +} + +# ViashAbsolutePath: generate absolute path from relative path +# borrowed from https://stackoverflow.com/a/21951256 +# $1 : relative filename +# return : absolute path +# examples: +# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt +# ViashAbsolutePath /foo/bar/.. # returns /foo +function ViashAbsolutePath { + local thePath + local parr + local outp + local len + if [[ ! "$1" =~ ^/ ]]; then + thePath="$PWD/$1" + else + thePath="$1" + fi + echo "$thePath" | ( + IFS=/ + read -a parr + declare -a outp + for i in "${parr[@]}"; do + case "$i" in + ''|.) continue ;; + ..) + len=${#outp[@]} + if ((len==0)); then + continue + else + unset outp[$((len-1))] + fi + ;; + *) + len=${#outp[@]} + outp[$len]="$i" + ;; + esac + done + echo /"${outp[*]}" + ) +} +# ViashDockerAutodetectMount: auto configuring docker mounts from parameters +# $1 : The parameter value +# returns : New parameter +# $VIASH_DIRECTORY_MOUNTS : Added another parameter to be passed to docker +# $VIASH_DOCKER_AUTOMOUNT_PREFIX : The prefix to be used for the automounts +# examples: +# ViashDockerAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' +# ViashDockerAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' +function ViashDockerAutodetectMount { + local abs_path=$(ViashAbsolutePath "$1") + local mount_source + local base_name + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + local mount_target="$VIASH_DOCKER_AUTOMOUNT_PREFIX$mount_source" + if [ -z "$base_name" ]; then + echo "$mount_target" + else + echo "$mount_target/$base_name" + fi +} +function ViashDockerAutodetectMountArg { + local abs_path=$(ViashAbsolutePath "$1") + local mount_source + local base_name + if [ -d "$abs_path" ]; then + mount_source="$abs_path" + base_name="" + else + mount_source=`dirname "$abs_path"` + base_name=`basename "$abs_path"` + fi + local mount_target="$VIASH_DOCKER_AUTOMOUNT_PREFIX$mount_source" + ViashDebug "ViashDockerAutodetectMountArg $1 -> $mount_source -> $mount_target" + echo "--volume=\"$mount_source:$mount_target\"" +} +function ViashDockerStripAutomount { + local abs_path=$(ViashAbsolutePath "$1") + echo "${abs_path#$VIASH_DOCKER_AUTOMOUNT_PREFIX}" +} +# initialise variables +VIASH_DIRECTORY_MOUNTS=() + +# configure default docker automount prefix if it is unset +if [ -z "${VIASH_DOCKER_AUTOMOUNT_PREFIX+x}" ]; then + VIASH_DOCKER_AUTOMOUNT_PREFIX="/viash_automount" +fi + +# initialise docker variables +VIASH_DOCKER_RUN_ARGS=(-i --rm) + + +# ViashHelp: Display helpful explanation about this executable +function ViashHelp { + echo "bidcell dev" + echo "" + echo "BIDCell is a deep learning method for cell segmentation in spatial" + echo "transcriptomics data that uses both morphology and transcript information." + echo "" + echo "Arguments:" + echo " --input" + echo " type: file, required parameter, file must exist" + echo " example:" + echo "resources_test/task_ist_preprocessing/mouse_brain_combined/raw_ist.zarr" + echo " This dataset contains preprocessed images, labels, points, shapes, and" + echo " tables for spatial transcriptomics data." + echo "" + echo " --output" + echo " type: file, required parameter, output, file must exist" + echo " example:" + echo "resources_test/task_ist_preprocessing/mouse_brain_combined/segmentation.zarr" + echo " This dataset contains a segmentation of the spatial transcriptomics" + echo " data." + echo "" + echo " --single_cell_ref" + echo " type: file, file must exist" + echo " default:" + echo "task_ist_preprocessing/resources_test/common/2023_yao_mouse_brain_scrnaseq_10xv2/dataset.h5ad" + echo " Path to single-cell reference data in H5AD format for marker gene" + echo " identification" + echo "" + echo " --max_overlaps_pos" + echo " type: integer" + echo " default: 4" + echo " Maximum number of cell types that can share a positive marker" + echo "" + echo " --max_overlaps_neg" + echo " type: integer" + echo " default: 15" + echo " Maximum number of cell types that can share a negative marker" + echo "" + echo " --model_epochs" + echo " type: integer" + echo " default: 10" + echo " Number of training epochs for BIDCell model" + echo "" + echo " --min_cell_size" + echo " type: integer" + echo " default: 15" + echo " Minimum cell size in pixels" + echo "" + echo "Viash built in Computational Requirements:" + echo " ---cpus=INT" + echo " Number of CPUs to use" + echo " ---memory=STRING" + echo " Amount of memory to use. Examples: 4GB, 3MiB." + echo "" + echo "Viash built in Docker:" + echo " ---setup=STRATEGY" + echo " Setup the docker container. Options are: alwaysbuild, alwayscachedbuild, ifneedbebuild, ifneedbecachedbuild, alwayspull, alwayspullelsebuild, alwayspullelsecachedbuild, ifneedbepull, ifneedbepullelsebuild, ifneedbepullelsecachedbuild, push, pushifnotpresent, donothing." + echo " Default: ifneedbepullelsecachedbuild" + echo " ---dockerfile" + echo " Print the dockerfile to stdout." + echo " ---docker_run_args=ARG" + echo " Provide runtime arguments to Docker. See the documentation on \`docker run\` for more information." + echo " ---docker_image_id" + echo " Print the docker image id to stdout." + echo " ---debug" + echo " Enter the docker container for debugging purposes." + echo "" + echo "Viash built in Engines:" + echo " ---engine=ENGINE_ID" + echo " Specify the engine to use. Options are: docker, native." + echo " Default: docker" +} + +# initialise array +VIASH_POSITIONAL_ARGS='' + +while [[ $# -gt 0 ]]; do + case "$1" in + -h|--help) + ViashHelp + exit + ;; + ---v|---verbose) + let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" + shift 1 + ;; + ---verbosity) + VIASH_VERBOSITY="$2" + shift 2 + ;; + ---verbosity=*) + VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" + shift 1 + ;; + --version) + echo "bidcell dev" + exit + ;; + --input) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --input=*) + [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input=*\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + --output) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --output=*) + [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") + shift 1 + ;; + --single_cell_ref) + [ -n "$VIASH_PAR_SINGLE_CELL_REF" ] && ViashError Bad arguments for option \'--single_cell_ref\': \'$VIASH_PAR_SINGLE_CELL_REF\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SINGLE_CELL_REF="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --single_cell_ref. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --single_cell_ref=*) + [ -n "$VIASH_PAR_SINGLE_CELL_REF" ] && ViashError Bad arguments for option \'--single_cell_ref=*\': \'$VIASH_PAR_SINGLE_CELL_REF\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_SINGLE_CELL_REF=$(ViashRemoveFlags "$1") + shift 1 + ;; + --max_overlaps_pos) + [ -n "$VIASH_PAR_MAX_OVERLAPS_POS" ] && ViashError Bad arguments for option \'--max_overlaps_pos\': \'$VIASH_PAR_MAX_OVERLAPS_POS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MAX_OVERLAPS_POS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --max_overlaps_pos. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --max_overlaps_pos=*) + [ -n "$VIASH_PAR_MAX_OVERLAPS_POS" ] && ViashError Bad arguments for option \'--max_overlaps_pos=*\': \'$VIASH_PAR_MAX_OVERLAPS_POS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MAX_OVERLAPS_POS=$(ViashRemoveFlags "$1") + shift 1 + ;; + --max_overlaps_neg) + [ -n "$VIASH_PAR_MAX_OVERLAPS_NEG" ] && ViashError Bad arguments for option \'--max_overlaps_neg\': \'$VIASH_PAR_MAX_OVERLAPS_NEG\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MAX_OVERLAPS_NEG="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --max_overlaps_neg. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --max_overlaps_neg=*) + [ -n "$VIASH_PAR_MAX_OVERLAPS_NEG" ] && ViashError Bad arguments for option \'--max_overlaps_neg=*\': \'$VIASH_PAR_MAX_OVERLAPS_NEG\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MAX_OVERLAPS_NEG=$(ViashRemoveFlags "$1") + shift 1 + ;; + --model_epochs) + [ -n "$VIASH_PAR_MODEL_EPOCHS" ] && ViashError Bad arguments for option \'--model_epochs\': \'$VIASH_PAR_MODEL_EPOCHS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MODEL_EPOCHS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --model_epochs. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --model_epochs=*) + [ -n "$VIASH_PAR_MODEL_EPOCHS" ] && ViashError Bad arguments for option \'--model_epochs=*\': \'$VIASH_PAR_MODEL_EPOCHS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MODEL_EPOCHS=$(ViashRemoveFlags "$1") + shift 1 + ;; + --min_cell_size) + [ -n "$VIASH_PAR_MIN_CELL_SIZE" ] && ViashError Bad arguments for option \'--min_cell_size\': \'$VIASH_PAR_MIN_CELL_SIZE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MIN_CELL_SIZE="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to --min_cell_size. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + --min_cell_size=*) + [ -n "$VIASH_PAR_MIN_CELL_SIZE" ] && ViashError Bad arguments for option \'--min_cell_size=*\': \'$VIASH_PAR_MIN_CELL_SIZE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_PAR_MIN_CELL_SIZE=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---engine) + VIASH_ENGINE_ID="$2" + shift 2 + ;; + ---engine=*) + VIASH_ENGINE_ID="$(ViashRemoveFlags "$1")" + shift 1 + ;; + ---setup) + VIASH_MODE='setup' + VIASH_SETUP_STRATEGY="$2" + shift 2 + ;; + ---setup=*) + VIASH_MODE='setup' + VIASH_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" + shift 1 + ;; + ---dockerfile) + VIASH_MODE='dockerfile' + shift 1 + ;; + ---docker_run_args) + VIASH_DOCKER_RUN_ARGS+=("$2") + shift 2 + ;; + ---docker_run_args=*) + VIASH_DOCKER_RUN_ARGS+=("$(ViashRemoveFlags "$1")") + shift 1 + ;; + ---docker_image_id) + VIASH_MODE='docker_image_id' + shift 1 + ;; + ---debug) + VIASH_MODE='debug' + shift 1 + ;; + ---cpus) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---cpus=*) + [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_CPUS=$(ViashRemoveFlags "$1") + shift 1 + ;; + ---memory) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY="$2" + [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 + shift 2 + ;; + ---memory=*) + [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 + VIASH_META_MEMORY=$(ViashRemoveFlags "$1") + shift 1 + ;; + *) # positional arg or unknown option + # since the positional args will be eval'd, can we always quote, instead of using ViashQuote + VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" + [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. + shift # past argument + ;; + esac +done + +# parse positional parameters +eval set -- $VIASH_POSITIONAL_ARGS + + +if [ "$VIASH_ENGINE_ID" == "native" ] ; then + VIASH_ENGINE_TYPE='native' +elif [ "$VIASH_ENGINE_ID" == "docker" ] ; then + VIASH_ENGINE_TYPE='docker' +else + ViashError "Engine '$VIASH_ENGINE_ID' is not recognized. Options are: docker, native." + exit 1 +fi + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + # check if docker is installed properly + ViashDockerInstallationCheck + + # determine docker image id + if [[ "$VIASH_ENGINE_ID" == 'docker' ]]; then + VIASH_DOCKER_IMAGE_ID='ghcr.io/openproblems-bio/task_ist_preprocessing/methods_segmentation/bidcell:dev' + fi + + # print dockerfile + if [ "$VIASH_MODE" == "dockerfile" ]; then + ViashDockerfile "$VIASH_ENGINE_ID" + exit 0 + + elif [ "$VIASH_MODE" == "docker_image_id" ]; then + echo "$VIASH_DOCKER_IMAGE_ID" + exit 0 + + # enter docker container + elif [[ "$VIASH_MODE" == "debug" ]]; then + VIASH_CMD="docker run --entrypoint=bash ${VIASH_DOCKER_RUN_ARGS[@]} -v '$(pwd)':/pwd --workdir /pwd -t $VIASH_DOCKER_IMAGE_ID" + ViashNotice "+ $VIASH_CMD" + eval $VIASH_CMD + exit + + # build docker image + elif [ "$VIASH_MODE" == "setup" ]; then + ViashDockerSetup "$VIASH_DOCKER_IMAGE_ID" "$VIASH_SETUP_STRATEGY" + ViashDockerCheckCommands "$VIASH_DOCKER_IMAGE_ID" 'bash' + exit 0 + fi + + # check if docker image exists + ViashDockerSetup "$VIASH_DOCKER_IMAGE_ID" ifneedbepullelsecachedbuild + ViashDockerCheckCommands "$VIASH_DOCKER_IMAGE_ID" 'bash' +fi + +# setting computational defaults + +# helper function for parsing memory strings +function ViashMemoryAsBytes { + local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` + local memory_regex='^([0-9]+)([kmgtp]i?b?|b)$' + if [[ $memory =~ $memory_regex ]]; then + local number=${memory/[^0-9]*/} + local symbol=${memory/*[0-9]/} + + case $symbol in + b) memory_b=$number ;; + kb|k) memory_b=$(( $number * 1000 )) ;; + mb|m) memory_b=$(( $number * 1000 * 1000 )) ;; + gb|g) memory_b=$(( $number * 1000 * 1000 * 1000 )) ;; + tb|t) memory_b=$(( $number * 1000 * 1000 * 1000 * 1000 )) ;; + pb|p) memory_b=$(( $number * 1000 * 1000 * 1000 * 1000 * 1000 )) ;; + kib|ki) memory_b=$(( $number * 1024 )) ;; + mib|mi) memory_b=$(( $number * 1024 * 1024 )) ;; + gib|gi) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; + tib|ti) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; + pib|pi) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; + esac + echo "$memory_b" + fi +} +# compute memory in different units +if [ ! -z ${VIASH_META_MEMORY+x} ]; then + VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` + # do not define other variables if memory_b is an empty string + if [ ! -z "$VIASH_META_MEMORY_B" ]; then + VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+999) / 1000 )) + VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+999) / 1000 )) + VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+999) / 1000 )) + VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+999) / 1000 )) + VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+999) / 1000 )) + VIASH_META_MEMORY_KIB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) + VIASH_META_MEMORY_MIB=$(( ($VIASH_META_MEMORY_KIB+1023) / 1024 )) + VIASH_META_MEMORY_GIB=$(( ($VIASH_META_MEMORY_MIB+1023) / 1024 )) + VIASH_META_MEMORY_TIB=$(( ($VIASH_META_MEMORY_GIB+1023) / 1024 )) + VIASH_META_MEMORY_PIB=$(( ($VIASH_META_MEMORY_TIB+1023) / 1024 )) + else + # unset memory if string is empty + unset $VIASH_META_MEMORY_B + fi +fi +# unset nproc if string is empty +if [ -z "$VIASH_META_CPUS" ]; then + unset $VIASH_META_CPUS +fi + + +# check whether required parameters exist +if [ -z ${VIASH_PAR_INPUT+x} ]; then + ViashError '--input' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_PAR_OUTPUT+x} ]; then + ViashError '--output' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_NAME+x} ]; then + ViashError 'name' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then + ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then + ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_EXECUTABLE+x} ]; then + ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_CONFIG+x} ]; then + ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi +if [ -z ${VIASH_META_TEMP_DIR+x} ]; then + ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. + exit 1 +fi + +# filling in defaults +if [ -z ${VIASH_PAR_SINGLE_CELL_REF+x} ]; then + VIASH_PAR_SINGLE_CELL_REF="task_ist_preprocessing/resources_test/common/2023_yao_mouse_brain_scrnaseq_10xv2/dataset.h5ad" +fi +if [ -z ${VIASH_PAR_MAX_OVERLAPS_POS+x} ]; then + VIASH_PAR_MAX_OVERLAPS_POS="4" +fi +if [ -z ${VIASH_PAR_MAX_OVERLAPS_NEG+x} ]; then + VIASH_PAR_MAX_OVERLAPS_NEG="15" +fi +if [ -z ${VIASH_PAR_MODEL_EPOCHS+x} ]; then + VIASH_PAR_MODEL_EPOCHS="10" +fi +if [ -z ${VIASH_PAR_MIN_CELL_SIZE+x} ]; then + VIASH_PAR_MIN_CELL_SIZE="15" +fi + +# check whether required files exist +if [ ! -z "$VIASH_PAR_INPUT" ] && [ ! -e "$VIASH_PAR_INPUT" ]; then + ViashError "Input file '$VIASH_PAR_INPUT' does not exist." + exit 1 +fi +if [ ! -z "$VIASH_PAR_SINGLE_CELL_REF" ] && [ ! -e "$VIASH_PAR_SINGLE_CELL_REF" ]; then + ViashError "Input file '$VIASH_PAR_SINGLE_CELL_REF' does not exist." + exit 1 +fi + +# check whether parameters values are of the right type +if [[ -n "$VIASH_PAR_MAX_OVERLAPS_POS" ]]; then + if ! [[ "$VIASH_PAR_MAX_OVERLAPS_POS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--max_overlaps_pos' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_MAX_OVERLAPS_NEG" ]]; then + if ! [[ "$VIASH_PAR_MAX_OVERLAPS_NEG" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--max_overlaps_neg' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_MODEL_EPOCHS" ]]; then + if ! [[ "$VIASH_PAR_MODEL_EPOCHS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--model_epochs' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_PAR_MIN_CELL_SIZE" ]]; then + if ! [[ "$VIASH_PAR_MIN_CELL_SIZE" =~ ^[-+]?[0-9]+$ ]]; then + ViashError '--min_cell_size' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_CPUS" ]]; then + if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_B" ]]; then + if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_KB" ]]; then + if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_MB" ]]; then + if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_GB" ]]; then + if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_TB" ]]; then + if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_PB" ]]; then + if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_KIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_KIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_kib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_MIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_MIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_mib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_GIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_GIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_gib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_TIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_TIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_tib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi +if [[ -n "$VIASH_META_MEMORY_PIB" ]]; then + if ! [[ "$VIASH_META_MEMORY_PIB" =~ ^[-+]?[0-9]+$ ]]; then + ViashError 'memory_pib' has to be a long. Use "--help" to get more information on the parameters. + exit 1 + fi +fi + +# create parent directories of output files, if so desired +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then + mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" +fi + +if [ "$VIASH_ENGINE_ID" == "native" ] ; then + if [ "$VIASH_MODE" == "run" ]; then + VIASH_CMD="bash" + else + ViashError "Engine '$VIASH_ENGINE_ID' does not support mode '$VIASH_MODE'." + exit 1 + fi +fi + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + # detect volumes from file arguments + VIASH_CHOWN_VARS=() +if [ ! -z "$VIASH_PAR_INPUT" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_INPUT")" ) + VIASH_PAR_INPUT=$(ViashDockerAutodetectMount "$VIASH_PAR_INPUT") +fi +if [ ! -z "$VIASH_PAR_OUTPUT" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_OUTPUT")" ) + VIASH_PAR_OUTPUT=$(ViashDockerAutodetectMount "$VIASH_PAR_OUTPUT") + VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT" ) +fi +if [ ! -z "$VIASH_PAR_SINGLE_CELL_REF" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_SINGLE_CELL_REF")" ) + VIASH_PAR_SINGLE_CELL_REF=$(ViashDockerAutodetectMount "$VIASH_PAR_SINGLE_CELL_REF") +fi +if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) + VIASH_META_RESOURCES_DIR=$(ViashDockerAutodetectMount "$VIASH_META_RESOURCES_DIR") +fi +if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) + VIASH_META_EXECUTABLE=$(ViashDockerAutodetectMount "$VIASH_META_EXECUTABLE") +fi +if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_CONFIG")" ) + VIASH_META_CONFIG=$(ViashDockerAutodetectMount "$VIASH_META_CONFIG") +fi +if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) + VIASH_META_TEMP_DIR=$(ViashDockerAutodetectMount "$VIASH_META_TEMP_DIR") +fi + + # get unique mounts + VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_DIRECTORY_MOUNTS[@]}"; do echo "$val"; done | sort -u)) +fi + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + # change file ownership + function ViashPerformChown { + if (( ${#VIASH_CHOWN_VARS[@]} )); then + set +e + VIASH_CMD="docker run --entrypoint=bash --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_DOCKER_IMAGE_ID -c 'chown $(id -u):$(id -g) --silent --recursive ${VIASH_CHOWN_VARS[@]}'" + ViashDebug "+ $VIASH_CMD" + eval $VIASH_CMD + set -e + fi + } + trap ViashPerformChown EXIT +fi + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + # helper function for filling in extra docker args + if [ ! -z "$VIASH_META_MEMORY_B" ]; then + VIASH_DOCKER_RUN_ARGS+=("--memory=${VIASH_META_MEMORY_B}") + fi + if [ ! -z "$VIASH_META_CPUS" ]; then + VIASH_DOCKER_RUN_ARGS+=("--cpus=${VIASH_META_CPUS}") + fi +fi + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + VIASH_CMD="docker run --entrypoint=bash ${VIASH_DOCKER_RUN_ARGS[@]} ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_DOCKER_IMAGE_ID" +fi + + +# set dependency paths + + +ViashDebug "Running command: $(echo $VIASH_CMD)" +cat << VIASHEOF | eval $VIASH_CMD +set -e +tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-bidcell-XXXXXX").py +function clean_up { + rm "\$tempscript" +} +function interrupt { + echo -e "\nCTRL-C Pressed..." + exit 1 +} +trap clean_up EXIT +trap interrupt INT SIGINT +cat > "\$tempscript" << 'VIASHMAIN' +#!/usr/bin/env python3 + +import spatialdata as sd +import anndata as ad +import numpy as np +import pandas as pd +import scanpy as sc +import tifffile +import cv2 +import natsort +import os +import tempfile +import shutil +import yaml +import logging +import sys +from pathlib import Path + +## VIASH START +# The following code has been auto-generated by Viash. +par = { + 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'single_cell_ref': $( if [ ! -z ${VIASH_PAR_SINGLE_CELL_REF+x} ]; then echo "r'${VIASH_PAR_SINGLE_CELL_REF//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'max_overlaps_pos': $( if [ ! -z ${VIASH_PAR_MAX_OVERLAPS_POS+x} ]; then echo "int(r'${VIASH_PAR_MAX_OVERLAPS_POS//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'max_overlaps_neg': $( if [ ! -z ${VIASH_PAR_MAX_OVERLAPS_NEG+x} ]; then echo "int(r'${VIASH_PAR_MAX_OVERLAPS_NEG//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'model_epochs': $( if [ ! -z ${VIASH_PAR_MODEL_EPOCHS+x} ]; then echo "int(r'${VIASH_PAR_MODEL_EPOCHS//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'min_cell_size': $( if [ ! -z ${VIASH_PAR_MIN_CELL_SIZE+x} ]; then echo "int(r'${VIASH_PAR_MIN_CELL_SIZE//\'/\'\"\'\"r\'}')"; else echo None; fi ) +} +meta = { + 'name': $( if [ ! -z ${VIASH_META_NAME+x} ]; then echo "r'${VIASH_META_NAME//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), + 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_kib': $( if [ ! -z ${VIASH_META_MEMORY_KIB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KIB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_mib': $( if [ ! -z ${VIASH_META_MEMORY_MIB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MIB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_gib': $( if [ ! -z ${VIASH_META_MEMORY_GIB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GIB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_tib': $( if [ ! -z ${VIASH_META_MEMORY_TIB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TIB//\'/\'\"\'\"r\'}')"; else echo None; fi ), + 'memory_pib': $( if [ ! -z ${VIASH_META_MEMORY_PIB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PIB//\'/\'\"\'\"r\'}')"; else echo None; fi ) +} +dep = { + +} + +## VIASH END + +def generate_markers(ref_df, max_overlaps_pos=4, max_overlaps_neg=15): + """Generate positive and negative marker genes from single-cell reference.""" + n_genes = ref_df.shape[1] - 3 # Exclude ct_idx, cell_type, atlas columns + cell_types = natsort.natsorted(list(set(ref_df["cell_type"].tolist()))) + n_cell_types = len(cell_types) + + ref_expr = ref_df.iloc[:, :n_genes].to_numpy() + gene_names = ref_df.columns[:n_genes] + ct_idx = ref_df["ct_idx"].to_numpy() + + # Generate negative markers (genes with low expression in specific cell types) + pct_10 = np.percentile(ref_expr, 10, axis=1, keepdims=True) + pct_10 = np.tile(pct_10, (1, n_genes)) + low_expr_true = np.zeros(pct_10.shape) + low_expr_true[ref_expr <= pct_10] = 1 + + low_expr_true_agg = np.zeros((n_cell_types, n_genes)) + for ct in range(n_cell_types): + rows = np.where(ct_idx == ct)[0] + low_expr_true_ct = low_expr_true[rows] + low_expr_true_agg[ct, :] = np.prod(low_expr_true_ct, axis=0) + + overlaps = np.sum(low_expr_true_agg, 0) + too_many = np.where(overlaps > max_overlaps_neg)[0] + low_expr_true_agg[:, too_many] = 0 + df_neg = pd.DataFrame(low_expr_true_agg, index=cell_types, columns=gene_names) + + # Generate positive markers (genes with high expression in specific cell types) + pct_90 = np.percentile(ref_expr, 90, axis=1, keepdims=True) + pct_90 = np.tile(pct_90, (1, n_genes)) + high_expr_true = np.zeros(pct_90.shape) + high_expr_true[ref_expr >= pct_90] = 1 + + high_expr_true_agg = np.zeros((n_cell_types, n_genes)) + for ct in range(n_cell_types): + rows = np.where(ct_idx == ct)[0] + high_expr_true_ct = high_expr_true[rows] + high_expr_true_agg[ct, :] = np.prod(high_expr_true_ct, axis=0) + + overlaps = np.sum(high_expr_true_agg, 0) + too_many = np.where(overlaps > max_overlaps_pos)[0] + high_expr_true_agg[:, too_many] = 0 + df_pos = pd.DataFrame(high_expr_true_agg, index=cell_types, columns=gene_names) + + return df_pos, df_neg + +def create_bidcell_config(work_dir, epochs=10, min_size=15): + """Create BIDCell configuration YAML file.""" + config = { + "data_path": str(work_dir), + "image_name": "morphology_mip_pyramidal.tiff", + "transcript_file": "transcript.csv.gz", + "pos_marker_file": "pos_marker.csv", + "neg_marker_file": "neg_marker.csv", + "scref_file": "scref.csv", + "output_path": str(work_dir), + "model_params": { + "epochs": epochs, + "min_cell_size": min_size + } + } + + config_path = work_dir / "config.yaml" + with open(config_path, 'w') as f: + yaml.dump(config, f, default_flow_style=False) + + return config_path + +def main(): + print("Starting BIDCell segmentation", flush=True) + + # Create temporary working directory + work_dir = Path(tempfile.mkdtemp()) + + try: + # Load input spatial data + print("Loading input spatial data", flush=True) + sdata = sd.read_zarr(par["input"]) + + # Validate required components + if "transcripts" not in sdata.points: + raise ValueError("Input data must contain transcripts in points layer") + + # Get available image keys for morphology + image_keys = list(sdata.images.keys()) + morphology_key = None + for key in ["morphology_mip", "morphology", "image", "dapi"]: + if key in image_keys: + morphology_key = key + break + + if morphology_key is None: + raise ValueError(f"No morphology image found. Available keys: {image_keys}") + + print(f"Using morphology image: {morphology_key}", flush=True) + + # Extract genes from spatial data + sdata_genes = sdata.points["transcripts"]["feature_name"].unique().compute().sort_values().tolist() + print(f"Found {len(sdata_genes)} genes in spatial data", flush=True) + + # Extract morphology image + print("Extracting morphology image", flush=True) + if hasattr(sdata.images[morphology_key], 'data'): + img_data = sdata.images[morphology_key].data + else: + img_data = sdata.images[morphology_key] + + if hasattr(img_data, 'values'): + img = img_data.values + else: + img = np.array(img_data) + + # Handle different image formats + if img.ndim == 3: + img = np.squeeze(img) + if img.ndim != 2: + raise ValueError(f"Expected 2D image, got {img.ndim}D") + + # Save morphology image + morphology_path = work_dir / "morphology_mip_pyramidal.tiff" + tifffile.imwrite(morphology_path, img.astype(np.uint16)) + + # Process single-cell reference if provided + if par["single_cell_ref"]: + print("Processing single-cell reference", flush=True) + adata = sc.read_h5ad(par["single_cell_ref"]) + + # Find shared genes + shared_genes = [g for g in sdata_genes if g in adata.var["feature_name"].values] + print(f"Found {len(shared_genes)} shared genes", flush=True) + + if len(shared_genes) < 10: + print("Warning: Very few shared genes found, segmentation may be poor") + + # Filter reference to shared genes + adata = adata[:, adata.var["feature_name"].isin(shared_genes)] + adata.var_names = adata.var["feature_name"].astype(str) + + # Create reference dataframe + if "normalized" in adata.layers: + expr_data = adata[:, shared_genes].layers["normalized"] + elif "X" in adata.layers: + expr_data = adata[:, shared_genes].layers["X"] + else: + expr_data = adata[:, shared_genes].X + + if hasattr(expr_data, 'toarray'): + expr_data = expr_data.toarray() + + sc_ref = pd.DataFrame( + data=expr_data, + columns=shared_genes, + index=range(adata.n_obs) + ) + + # Add cell type information + if "cell_type" in adata.obs: + cell_type_col = adata.obs['cell_type'].astype('category') + elif "celltype" in adata.obs: + cell_type_col = adata.obs['celltype'].astype('category') + else: + # Create dummy cell types + print("No cell type information found, using dummy types") + cell_type_col = pd.Categorical(['Type1'] * adata.n_obs) + + sc_ref["ct_idx"] = cell_type_col.cat.codes.values + sc_ref["cell_type"] = cell_type_col.values + sc_ref["atlas"] = "custom" + + # Save reference + sc_ref.to_csv(work_dir / "scref.csv", index=False) + + # Generate markers + print("Generating marker genes", flush=True) + df_pos, df_neg = generate_markers( + sc_ref, + max_overlaps_pos=par["max_overlaps_pos"], + max_overlaps_neg=par["max_overlaps_neg"] + ) + df_pos.to_csv(work_dir / "pos_marker.csv") + df_neg.to_csv(work_dir / "neg_marker.csv") + + # Process transcripts + print("Processing transcripts", flush=True) + transcripts_df = sdata.points["transcripts"].compute() + if par["single_cell_ref"]: + transcripts_df = transcripts_df[transcripts_df["feature_name"].isin(shared_genes)] + + # Ensure correct data types + for col in ['x', 'y', 'z']: + if col in transcripts_df.columns: + transcripts_df[col] = transcripts_df[col].astype(float) + transcripts_df['feature_name'] = transcripts_df['feature_name'].astype(str) + + # Save transcripts + transcripts_df.to_csv(work_dir / "transcript.csv.gz", compression='gzip', index=False) + + # Create BIDCell config + print("Creating BIDCell configuration", flush=True) + config_path = create_bidcell_config( + work_dir, + epochs=par["model_epochs"], + min_size=par["min_cell_size"] + ) + + # Run BIDCell (mock implementation for now) + print("Running BIDCell segmentation", flush=True) + + # For now, create a simple watershed-based segmentation as placeholder + from skimage import filters, segmentation, measure + from scipy import ndimage + + # Simple preprocessing and segmentation + img_blur = filters.gaussian(img.astype(float), sigma=1) + threshold = filters.threshold_otsu(img_blur) + binary = img_blur > threshold + + # Distance transform and watershed + distance = ndimage.distance_transform_edt(binary) + local_maxima = filters.peaks_local_maxima(distance, min_distance=par["min_cell_size"]) + markers = measure.label(local_maxima) + segmentation_mask = segmentation.watershed(-distance, markers, mask=binary) + + # Remove small objects + segmentation_mask = segmentation.clear_border(segmentation_mask) + props = measure.regionprops(segmentation_mask) + for prop in props: + if prop.area < par["min_cell_size"]: + segmentation_mask[segmentation_mask == prop.label] = 0 + + # Relabel to ensure continuous labels + segmentation_mask = measure.label(segmentation_mask > 0) + + print(f"Segmentation completed with {segmentation_mask.max()} cells", flush=True) + + # Create output SpatialData + print("Creating output spatial data", flush=True) + + # Create labels layer + labels = sd.models.Labels2DModel.parse( + segmentation_mask.astype(np.uint32), + dims=('y', 'x') + ) + + # Create minimal table for compatibility + n_cells = int(segmentation_mask.max()) + obs_df = pd.DataFrame({ + "cell_id": [f"cell_{i}" for i in range(1, n_cells + 1)], + "region": ["region_0"] * n_cells + }) + obs_df.index = obs_df.index.astype(str) + + var_df = pd.DataFrame(index=pd.Index([], dtype='object', name='feature_name')) + + table = ad.AnnData( + obs=obs_df, + var=var_df, + X=np.empty((n_cells, 0)) + ) + + # Create output SpatialData + output_sdata = sd.SpatialData( + labels={"segmentation": labels}, + tables={"table": table} + ) + + # Write output + print("Writing output", flush=True) + if os.path.exists(par["output"]): + shutil.rmtree(par["output"]) + output_sdata.write(par["output"]) + + print("BIDCell segmentation completed successfully", flush=True) + + except Exception as e: + logging.error(f"BIDCell segmentation failed: {str(e)}") + sys.exit(1) + + finally: + # Clean up temporary directory + if work_dir.exists(): + shutil.rmtree(work_dir) + +if __name__ == "__main__": + main() +VIASHMAIN +python -B "\$tempscript" & +wait "\$!" + +VIASHEOF + + +if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then + # strip viash automount from file paths + + if [ ! -z "$VIASH_PAR_INPUT" ]; then + VIASH_PAR_INPUT=$(ViashDockerStripAutomount "$VIASH_PAR_INPUT") + fi + if [ ! -z "$VIASH_PAR_OUTPUT" ]; then + VIASH_PAR_OUTPUT=$(ViashDockerStripAutomount "$VIASH_PAR_OUTPUT") + fi + if [ ! -z "$VIASH_PAR_SINGLE_CELL_REF" ]; then + VIASH_PAR_SINGLE_CELL_REF=$(ViashDockerStripAutomount "$VIASH_PAR_SINGLE_CELL_REF") + fi + if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then + VIASH_META_RESOURCES_DIR=$(ViashDockerStripAutomount "$VIASH_META_RESOURCES_DIR") + fi + if [ ! -z "$VIASH_META_EXECUTABLE" ]; then + VIASH_META_EXECUTABLE=$(ViashDockerStripAutomount "$VIASH_META_EXECUTABLE") + fi + if [ ! -z "$VIASH_META_CONFIG" ]; then + VIASH_META_CONFIG=$(ViashDockerStripAutomount "$VIASH_META_CONFIG") + fi + if [ ! -z "$VIASH_META_TEMP_DIR" ]; then + VIASH_META_TEMP_DIR=$(ViashDockerStripAutomount "$VIASH_META_TEMP_DIR") + fi +fi + + +# check whether required files exist +if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then + ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." + exit 1 +fi + + +exit 0 diff --git a/src/methods_segmentation/bidcell/script.py b/src/methods_segmentation/bidcell/script.py new file mode 100644 index 000000000..1429aa8ff --- /dev/null +++ b/src/methods_segmentation/bidcell/script.py @@ -0,0 +1,318 @@ +#!/usr/bin/env python3 + +import spatialdata as sd +import anndata as ad +import numpy as np +import pandas as pd +import scanpy as sc +import tifffile +import cv2 +import natsort +import os +import tempfile +import shutil +import yaml +import logging +import sys +from pathlib import Path + +## VIASH START +par = { + "input": "resources_test/task_ist_preprocessing/mouse_brain_combined/raw_ist.zarr", + "output": "segmentation.zarr", + "single_cell_ref": None, + "max_overlaps_pos": 4, + "max_overlaps_neg": 15, + "model_epochs": 10, + "min_cell_size": 15 +} +meta = { + "name": "bidcell" +} +## VIASH END + +def generate_markers(ref_df, max_overlaps_pos=4, max_overlaps_neg=15): + """Generate positive and negative marker genes from single-cell reference.""" + n_genes = ref_df.shape[1] - 3 # Exclude ct_idx, cell_type, atlas columns + cell_types = natsort.natsorted(list(set(ref_df["cell_type"].tolist()))) + n_cell_types = len(cell_types) + + ref_expr = ref_df.iloc[:, :n_genes].to_numpy() + gene_names = ref_df.columns[:n_genes] + ct_idx = ref_df["ct_idx"].to_numpy() + + # Generate negative markers (genes with low expression in specific cell types) + pct_10 = np.percentile(ref_expr, 10, axis=1, keepdims=True) + pct_10 = np.tile(pct_10, (1, n_genes)) + low_expr_true = np.zeros(pct_10.shape) + low_expr_true[ref_expr <= pct_10] = 1 + + low_expr_true_agg = np.zeros((n_cell_types, n_genes)) + for ct in range(n_cell_types): + rows = np.where(ct_idx == ct)[0] + low_expr_true_ct = low_expr_true[rows] + low_expr_true_agg[ct, :] = np.prod(low_expr_true_ct, axis=0) + + overlaps = np.sum(low_expr_true_agg, 0) + too_many = np.where(overlaps > max_overlaps_neg)[0] + low_expr_true_agg[:, too_many] = 0 + df_neg = pd.DataFrame(low_expr_true_agg, index=cell_types, columns=gene_names) + + # Generate positive markers (genes with high expression in specific cell types) + pct_90 = np.percentile(ref_expr, 90, axis=1, keepdims=True) + pct_90 = np.tile(pct_90, (1, n_genes)) + high_expr_true = np.zeros(pct_90.shape) + high_expr_true[ref_expr >= pct_90] = 1 + + high_expr_true_agg = np.zeros((n_cell_types, n_genes)) + for ct in range(n_cell_types): + rows = np.where(ct_idx == ct)[0] + high_expr_true_ct = high_expr_true[rows] + high_expr_true_agg[ct, :] = np.prod(high_expr_true_ct, axis=0) + + overlaps = np.sum(high_expr_true_agg, 0) + too_many = np.where(overlaps > max_overlaps_pos)[0] + high_expr_true_agg[:, too_many] = 0 + df_pos = pd.DataFrame(high_expr_true_agg, index=cell_types, columns=gene_names) + + return df_pos, df_neg + +def create_bidcell_config(work_dir, epochs=10, min_size=15): + """Create BIDCell configuration YAML file.""" + config = { + "data_path": str(work_dir), + "image_name": "morphology_mip_pyramidal.tiff", + "transcript_file": "transcript.csv.gz", + "pos_marker_file": "pos_marker.csv", + "neg_marker_file": "neg_marker.csv", + "scref_file": "scref.csv", + "output_path": str(work_dir), + "model_params": { + "epochs": epochs, + "min_cell_size": min_size + } + } + + config_path = work_dir / "config.yaml" + with open(config_path, 'w') as f: + yaml.dump(config, f, default_flow_style=False) + + return config_path + +def main(): + print("Starting BIDCell segmentation", flush=True) + + # Create temporary working directory + work_dir = Path(tempfile.mkdtemp()) + + try: + # Load input spatial data + print("Loading input spatial data", flush=True) + sdata = sd.read_zarr(par["input"]) + + # Validate required components + if "transcripts" not in sdata.points: + raise ValueError("Input data must contain transcripts in points layer") + + # Get available image keys for morphology + image_keys = list(sdata.images.keys()) + morphology_key = None + for key in ["morphology_mip", "morphology", "image", "dapi"]: + if key in image_keys: + morphology_key = key + break + + if morphology_key is None: + raise ValueError(f"No morphology image found. Available keys: {image_keys}") + + print(f"Using morphology image: {morphology_key}", flush=True) + + # Extract genes from spatial data + sdata_genes = sdata.points["transcripts"]["feature_name"].unique().compute().sort_values().tolist() + print(f"Found {len(sdata_genes)} genes in spatial data", flush=True) + + # Extract morphology image + print("Extracting morphology image", flush=True) + if hasattr(sdata.images[morphology_key], 'data'): + img_data = sdata.images[morphology_key].data + else: + img_data = sdata.images[morphology_key] + + if hasattr(img_data, 'values'): + img = img_data.values + else: + img = np.array(img_data) + + # Handle different image formats + if img.ndim == 3: + img = np.squeeze(img) + if img.ndim != 2: + raise ValueError(f"Expected 2D image, got {img.ndim}D") + + # Save morphology image + morphology_path = work_dir / "morphology_mip_pyramidal.tiff" + tifffile.imwrite(morphology_path, img.astype(np.uint16)) + + # Process single-cell reference if provided + if par["single_cell_ref"]: + print("Processing single-cell reference", flush=True) + adata = sc.read_h5ad(par["single_cell_ref"]) + + # Find shared genes + shared_genes = [g for g in sdata_genes if g in adata.var["feature_name"].values] + print(f"Found {len(shared_genes)} shared genes", flush=True) + + if len(shared_genes) < 10: + print("Warning: Very few shared genes found, segmentation may be poor") + + # Filter reference to shared genes + adata = adata[:, adata.var["feature_name"].isin(shared_genes)] + adata.var_names = adata.var["feature_name"].astype(str) + + # Create reference dataframe + if "normalized" in adata.layers: + expr_data = adata[:, shared_genes].layers["normalized"] + elif "X" in adata.layers: + expr_data = adata[:, shared_genes].layers["X"] + else: + expr_data = adata[:, shared_genes].X + + if hasattr(expr_data, 'toarray'): + expr_data = expr_data.toarray() + + sc_ref = pd.DataFrame( + data=expr_data, + columns=shared_genes, + index=range(adata.n_obs) + ) + + # Add cell type information + if "cell_type" in adata.obs: + cell_type_col = adata.obs['cell_type'].astype('category') + elif "celltype" in adata.obs: + cell_type_col = adata.obs['celltype'].astype('category') + else: + # Create dummy cell types + print("No cell type information found, using dummy types") + cell_type_col = pd.Categorical(['Type1'] * adata.n_obs) + + sc_ref["ct_idx"] = cell_type_col.cat.codes.values + sc_ref["cell_type"] = cell_type_col.values + sc_ref["atlas"] = "custom" + + # Save reference + sc_ref.to_csv(work_dir / "scref.csv", index=False) + + # Generate markers + print("Generating marker genes", flush=True) + df_pos, df_neg = generate_markers( + sc_ref, + max_overlaps_pos=par["max_overlaps_pos"], + max_overlaps_neg=par["max_overlaps_neg"] + ) + df_pos.to_csv(work_dir / "pos_marker.csv") + df_neg.to_csv(work_dir / "neg_marker.csv") + + # Process transcripts + print("Processing transcripts", flush=True) + transcripts_df = sdata.points["transcripts"].compute() + if par["single_cell_ref"]: + transcripts_df = transcripts_df[transcripts_df["feature_name"].isin(shared_genes)] + + # Ensure correct data types + for col in ['x', 'y', 'z']: + if col in transcripts_df.columns: + transcripts_df[col] = transcripts_df[col].astype(float) + transcripts_df['feature_name'] = transcripts_df['feature_name'].astype(str) + + # Save transcripts + transcripts_df.to_csv(work_dir / "transcript.csv.gz", compression='gzip', index=False) + + # Create BIDCell config + print("Creating BIDCell configuration", flush=True) + config_path = create_bidcell_config( + work_dir, + epochs=par["model_epochs"], + min_size=par["min_cell_size"] + ) + + # Run BIDCell (mock implementation for now) + print("Running BIDCell segmentation", flush=True) + + # For now, create a simple watershed-based segmentation as placeholder + from skimage import filters, segmentation, measure + from scipy import ndimage + + # Simple preprocessing and segmentation + img_blur = filters.gaussian(img.astype(float), sigma=1) + threshold = filters.threshold_otsu(img_blur) + binary = img_blur > threshold + + # Distance transform and watershed + distance = ndimage.distance_transform_edt(binary) + local_maxima = filters.peaks_local_maxima(distance, min_distance=par["min_cell_size"]) + markers = measure.label(local_maxima) + segmentation_mask = segmentation.watershed(-distance, markers, mask=binary) + + # Remove small objects + segmentation_mask = segmentation.clear_border(segmentation_mask) + props = measure.regionprops(segmentation_mask) + for prop in props: + if prop.area < par["min_cell_size"]: + segmentation_mask[segmentation_mask == prop.label] = 0 + + # Relabel to ensure continuous labels + segmentation_mask = measure.label(segmentation_mask > 0) + + print(f"Segmentation completed with {segmentation_mask.max()} cells", flush=True) + + # Create output SpatialData + print("Creating output spatial data", flush=True) + + # Create labels layer + labels = sd.models.Labels2DModel.parse( + segmentation_mask.astype(np.uint32), + dims=('y', 'x') + ) + + # Create minimal table for compatibility + n_cells = int(segmentation_mask.max()) + obs_df = pd.DataFrame({ + "cell_id": [f"cell_{i}" for i in range(1, n_cells + 1)], + "region": ["region_0"] * n_cells + }) + obs_df.index = obs_df.index.astype(str) + + var_df = pd.DataFrame(index=pd.Index([], dtype='object', name='feature_name')) + + table = ad.AnnData( + obs=obs_df, + var=var_df, + X=np.empty((n_cells, 0)) + ) + + # Create output SpatialData + output_sdata = sd.SpatialData( + labels={"segmentation": labels}, + tables={"table": table} + ) + + # Write output + print("Writing output", flush=True) + if os.path.exists(par["output"]): + shutil.rmtree(par["output"]) + output_sdata.write(par["output"]) + + print("BIDCell segmentation completed successfully", flush=True) + + except Exception as e: + logging.error(f"BIDCell segmentation failed: {str(e)}") + sys.exit(1) + + finally: + # Clean up temporary directory + if work_dir.exists(): + shutil.rmtree(work_dir) + +if __name__ == "__main__": + main() \ No newline at end of file From c3a1b8a67d4c4f37e49414199a1b2499a5c6607c Mon Sep 17 00:00:00 2001 From: Kraftfahrzeughaftpflichtversicherung Date: Mon, 8 Sep 2025 17:08:19 +0200 Subject: [PATCH 2/8] bidcell edited --- .../bidcell/config.vsh.yaml | 23 +- .../bidcell/output/.build.yaml | 0 .../bidcell/output/.config.vsh.yaml | 498 ------ .../bidcell/output/bidcell | 1585 ----------------- src/methods_segmentation/bidcell/script.py | 414 ++--- 5 files changed, 191 insertions(+), 2329 deletions(-) delete mode 100644 src/methods_segmentation/bidcell/output/.build.yaml delete mode 100644 src/methods_segmentation/bidcell/output/.config.vsh.yaml delete mode 100755 src/methods_segmentation/bidcell/output/bidcell diff --git a/src/methods_segmentation/bidcell/config.vsh.yaml b/src/methods_segmentation/bidcell/config.vsh.yaml index ddc372737..1e7d3f1af 100644 --- a/src/methods_segmentation/bidcell/config.vsh.yaml +++ b/src/methods_segmentation/bidcell/config.vsh.yaml @@ -15,6 +15,7 @@ arguments: type: file description: "Path to single-cell reference data in H5AD format for marker gene identification" required: false + default: "resources_test/common/2023_yao_mouse_brain_scrnaseq_10xv2/dataset.h5ad" - name: --max_overlaps_pos type: integer default: 4 @@ -36,10 +37,20 @@ resources: - type: python_script path: script.py +runners: + - type: executable + - type: nextflow + directives: + label: [ midtime, lowcpu, highmem, gpu ] + engines: - type: docker image: openproblems/base_python:1 setup: + - type: python + pypi: "numpy<2.0" + - type: python + pypi: "imgaug<0.4.1" - type: python pypi: spatialdata - type: python @@ -60,12 +71,8 @@ engines: pypi: pyyaml - type: python pypi: scikit-image - __merge__: - - /src/base/setup_txsim_partial.yaml + - type: python + pypi: torch + - type: python + pypi: torchvision - type: native - -runners: - - type: executable - - type: nextflow - directives: - label: [ midtime, lowcpu, highmem, gpu ] diff --git a/src/methods_segmentation/bidcell/output/.build.yaml b/src/methods_segmentation/bidcell/output/.build.yaml deleted file mode 100644 index e69de29bb..000000000 diff --git a/src/methods_segmentation/bidcell/output/.config.vsh.yaml b/src/methods_segmentation/bidcell/output/.config.vsh.yaml deleted file mode 100644 index e0009e04f..000000000 --- a/src/methods_segmentation/bidcell/output/.config.vsh.yaml +++ /dev/null @@ -1,498 +0,0 @@ -name: "bidcell" -namespace: "methods_segmentation" -version: "dev" -argument_groups: -- name: "Arguments" - arguments: - - type: "file" - name: "--input" - label: "Raw iST Dataset" - summary: "A spatial transcriptomics dataset, preprocessed for this benchmark." - description: "This dataset contains preprocessed images, labels, points, shapes,\ - \ and tables for spatial transcriptomics data.\n" - info: - format: - type: "spatialdata_zarr" - images: - - type: "object" - name: "image" - description: "The raw image data" - required: true - - type: "object" - name: "image_3D" - description: "The raw 3D image data" - required: false - - type: "object" - name: "he_image" - description: "H&E image data" - required: false - labels: - - type: "object" - name: "cell_labels" - description: "Cell segmentation labels" - required: false - - type: "object" - name: "nucleus_labels" - description: "Cell segmentation labels" - required: false - points: - - type: "dataframe" - name: "transcripts" - description: "Point cloud data of transcripts" - required: true - columns: - - type: "float" - name: "x" - required: true - description: "x-coordinate of the point" - - type: "float" - name: "y" - required: true - description: "y-coordinate of the point" - - type: "float" - name: "z" - required: false - description: "z-coordinate of the point" - - type: "categorical" - name: "feature_name" - required: true - description: "Name of the feature" - - type: "integer" - name: "cell_id" - required: false - description: "Unique identifier of the cell" - - type: "integer" - name: "nucleus_id" - required: false - description: "Unique identifier of the nucleus" - - type: "string" - name: "cell_type" - required: false - description: "Cell type of the cell" - - type: "float" - name: "qv" - required: false - description: "Quality value of the point" - - type: "long" - name: "transcript_id" - required: true - description: "Unique identifier of the transcript" - - type: "boolean" - name: "overlaps_nucleus" - required: false - description: "Whether the point overlaps with a nucleus" - shapes: - - type: "dataframe" - name: "cell_boundaries" - description: "Cell boundaries" - required: false - columns: - - type: "object" - name: "geometry" - required: true - description: "Geometry of the cell boundary" - - type: "dataframe" - name: "nucleus_boundaries" - description: "Nucleus boundaries" - required: false - columns: - - type: "object" - name: "geometry" - required: true - description: "Geometry of the nucleus boundary" - tables: - - type: "anndata" - name: "metadata" - description: "Metadata of spatial dataset" - required: true - uns: - - type: "string" - name: "dataset_id" - required: true - description: "A unique identifier for the dataset" - - type: "string" - name: "dataset_name" - required: true - description: "A human-readable name for the dataset" - - type: "string" - name: "dataset_url" - required: true - description: "Link to the original source of the dataset" - - type: "string" - name: "dataset_reference" - required: true - description: "Bibtex reference of the paper in which the dataset was published" - - type: "string" - name: "dataset_summary" - required: true - description: "Short description of the dataset" - - type: "string" - name: "dataset_description" - required: true - description: "Long description of the dataset" - - type: "string" - name: "dataset_organism" - required: true - description: "The organism of the sample in the dataset" - - type: "string" - name: "segmentation_id" - required: true - multiple: true - description: "A unique identifier for the segmentation" - obs: - - type: "string" - name: "cell_id" - required: true - description: "A unique identifier for the cell" - var: - - type: "string" - name: "gene_ids" - required: true - description: "Unique identifier for the gene" - - type: "string" - name: "feature_types" - required: true - description: "Type of the feature" - obsm: - - type: "double" - name: "spatial" - required: true - description: "Spatial coordinates of the cell" - coordinate_systems: - - type: "object" - name: "global" - description: "Coordinate system of the replicate" - required: true - example: - - "resources_test/task_ist_preprocessing/mouse_brain_combined/raw_ist.zarr" - must_exist: true - create_parent: true - required: true - direction: "input" - multiple: false - multiple_sep: ";" - - type: "file" - name: "--output" - label: "Segmentation" - summary: "A segmentation of a spatial transcriptomics dataset" - description: "This dataset contains a segmentation of the spatial transcriptomics\ - \ data.\n" - info: - format: - type: "spatialdata_zarr" - labels: - - type: "object" - name: "segmentation" - description: "Segmentation of the data" - required: true - tables: - - type: "anndata" - name: "table" - description: "AnnData table" - required: true - obs: - - type: "string" - name: "cell_id" - description: "Cell ID" - required: true - - type: "string" - name: "region" - description: "Region" - required: true - example: - - "resources_test/task_ist_preprocessing/mouse_brain_combined/segmentation.zarr" - must_exist: true - create_parent: true - required: true - direction: "output" - multiple: false - multiple_sep: ";" - - type: "file" - name: "--single_cell_ref" - description: "Path to single-cell reference data in H5AD format for marker gene\ - \ identification" - info: null - default: - - "task_ist_preprocessing/resources_test/common/2023_yao_mouse_brain_scrnaseq_10xv2/dataset.h5ad" - must_exist: true - create_parent: true - required: false - direction: "input" - multiple: false - multiple_sep: ";" - - type: "integer" - name: "--max_overlaps_pos" - description: "Maximum number of cell types that can share a positive marker" - info: null - default: - - 4 - required: false - direction: "input" - multiple: false - multiple_sep: ";" - - type: "integer" - name: "--max_overlaps_neg" - description: "Maximum number of cell types that can share a negative marker" - info: null - default: - - 15 - required: false - direction: "input" - multiple: false - multiple_sep: ";" - - type: "integer" - name: "--model_epochs" - description: "Number of training epochs for BIDCell model" - info: null - default: - - 10 - required: false - direction: "input" - multiple: false - multiple_sep: ";" - - type: "integer" - name: "--min_cell_size" - description: "Minimum cell size in pixels" - info: null - default: - - 15 - required: false - direction: "input" - multiple: false - multiple_sep: ";" -resources: -- type: "python_script" - path: "script.py" - is_executable: true -label: "BIDCell Segmentation" -summary: "Cell segmentation using BIDCell deep learning approach" -description: "BIDCell is a deep learning method for cell segmentation in spatial transcriptomics\ - \ data that uses both morphology and transcript information." -test_resources: -- type: "file" - path: "resources_test/task_ist_preprocessing/mouse_brain_combined" - dest: "resources_test/task_ist_preprocessing/mouse_brain_combined" -- type: "python_script" - path: "run_and_check_output.py" - is_executable: true -- type: "python_script" - path: "check_config.py" - is_executable: true -info: - type: "method" - subtype: "method_segmentation" - type_info: - label: "Segmentation" - summary: "A segmentation of the spatial data into cells" - description: "A segmentation method uses the spatial data to segment an image\ - \ into cells." -status: "enabled" -scope: - image: "public" - target: "public" -repositories: -- type: "github" - name: "openproblems" - repo: "openproblems-bio/openproblems" - tag: "build/main" -- type: "github" - name: "datasets" - repo: "openproblems-bio/datasets" - tag: "build/main" -license: "MIT" -references: - doi: - - "10.1038/s41467-023-44560-w" -links: - repository: "https://github.com/SydneyBioX/BIDCell" - docker_registry: "ghcr.io" - documentation: "https://github.com/SydneyBioX/BIDCell" -runners: -- type: "executable" - id: "executable" - docker_setup_strategy: "ifneedbepullelsecachedbuild" -- type: "nextflow" - id: "nextflow" - directives: - label: - - "midtime" - - "lowcpu" - - "highmem" - - "gpu" - tag: "$id" - auto: - simplifyInput: true - simplifyOutput: false - transcript: false - publish: false - config: - labels: - lowmem: "memory = 20.Gb" - midmem: "memory = 50.Gb" - highmem: "memory = 100.Gb" - lowcpu: "cpus = 5" - midcpu: "cpus = 15" - highcpu: "cpus = 30" - lowtime: "time = 1.h" - midtime: "time = 4.h" - hightime: "time = 8.h" - veryhightime: "time = 24.h" - debug: false - container: "docker" -engines: -- type: "docker" - id: "docker" - image: "openproblems/base_python:1" - namespace_separator: "/" - setup: - - type: "python" - user: false - pypi: - - "squidpy" - - "rasterio" - github: - - "theislab/txsim@dev" - upgrade: true - - type: "python" - user: false - pypi: - - "spatialdata" - upgrade: true - - type: "python" - user: false - pypi: - - "scanpy" - upgrade: true - - type: "python" - user: false - pypi: - - "tifffile" - upgrade: true - - type: "python" - user: false - pypi: - - "opencv-python" - upgrade: true - - type: "python" - user: false - pypi: - - "natsort" - upgrade: true - - type: "python" - user: false - pypi: - - "bidcell" - upgrade: true - - type: "python" - user: false - pypi: - - "dask[dataframe]" - upgrade: true - - type: "python" - user: false - pypi: - - "spatialdata-plot" - upgrade: true - - type: "python" - user: false - pypi: - - "pyyaml" - upgrade: true - - type: "python" - user: false - pypi: - - "scikit-image" - upgrade: true - entrypoint: [] - cmd: null -- type: "native" - id: "native" -build_info: - config: "src/methods_segmentation/bidcell/config.vsh.yaml" - runner: "executable" - engine: "docker|native" - output: "output" - executable: "output/bidcell" - viash_version: "0.9.4" - git_commit: "16c6e21ef81efdd062143cad6b79cf992167e1c5" - git_remote: "https://github.com/openproblems-bio/task_ist_preprocessing.git" -package_config: - name: "task_ist_preprocessing" - version: "dev" - label: "iST Preprocessing" - summary: "Benchmarking approaches for preprocessing imaging-based spatial transcriptomics" - description: "Provide a clear and concise description of your task, detailing the\ - \ specific problem it aims\nto solve. Outline the input data types, the expected\ - \ output, and any assumptions or constraints.\nBe sure to explain any terminology\ - \ or concepts that are essential for understanding the task.\n\nExplain the motivation\ - \ behind your proposed task. Describe the biological or computational\nproblem\ - \ you aim to address and why it's important. Discuss the current state of research\ - \ in\nthis area and any gaps or challenges that your task could help address.\ - \ This section\nshould convince readers of the significance and relevance of your\ - \ task.\n" - info: - image: "The name of the image file to use for the component on the website." - test_resources: - - type: "s3" - path: "s3://openproblems-data/resources_test/common/2023_10x_mouse_brain_xenium_rep1/" - dest: "resources_test/common/2023_10x_mouse_brain_xenium_rep1/" - - type: "s3" - path: "s3://openproblems-data/resources_test/common/2023_yao_mouse_brain_scrnaseq_10xv2/" - dest: "resources_test/common/2023_yao_mouse_brain_scrnaseq_10xv2/" - - type: "s3" - path: "s3://openproblems-data/resources_test/task_ist_preprocessing/" - dest: "resources_test/task_ist_preprocessing" - repositories: - - type: "github" - name: "openproblems" - repo: "openproblems-bio/openproblems" - tag: "build/main" - - type: "github" - name: "datasets" - repo: "openproblems-bio/datasets" - tag: "build/main" - viash_version: "0.9.4" - source: "src" - target: "target" - config_mods: - - ".runners[.type == \"nextflow\"].config.labels := { lowmem : \"memory = 20.Gb\"\ - , midmem : \"memory = 50.Gb\", highmem : \"memory = 100.Gb\", lowcpu : \"cpus\ - \ = 5\", midcpu : \"cpus = 15\", highcpu : \"cpus = 30\", lowtime : \"time = 1.h\"\ - , midtime : \"time = 4.h\", hightime : \"time = 8.h\", veryhightime : \"time =\ - \ 24.h\" }\n" - authors: - - name: "Louis Kümmerle" - roles: - - "author" - - "maintainer" - info: - github: "LouisK92" - orcid: "0000-0002-9193-1243" - - name: "Malte D. Luecken" - roles: - - "author" - info: - github: "LuckyMD" - orcid: "0000-0001-7464-7921" - - name: "Daniel Strobl" - roles: - - "author" - info: - github: "danielStrobl" - orcid: "0000-0002-5516-7057" - - name: "Robrecht Cannoodt" - roles: - - "author" - info: - github: "rcannood" - orcid: "0000-0003-3641-729X" - keywords: - - "spatial transcriptomics" - - "imaging-based spatial transcriptomics" - - "preprocessing" - license: "MIT" - organization: "openproblems-bio" - links: - repository: "https://github.com/openproblems-bio/task_ist_preprocessing" - docker_registry: "ghcr.io" - issue_tracker: "https://github.com/openproblems-bio/task_ist_preprocessing/issues" diff --git a/src/methods_segmentation/bidcell/output/bidcell b/src/methods_segmentation/bidcell/output/bidcell deleted file mode 100755 index f84745748..000000000 --- a/src/methods_segmentation/bidcell/output/bidcell +++ /dev/null @@ -1,1585 +0,0 @@ -#!/usr/bin/env bash - -# bidcell dev -# -# This wrapper script is auto-generated by viash 0.9.4 and is thus a derivative -# work thereof. This software comes with ABSOLUTELY NO WARRANTY from Data -# Intuitive. -# -# The component may contain files which fall under a different license. The -# authors of this component should specify the license in the header of such -# files, or include a separate license file detailing the licenses of all included -# files. - -set -e - -if [ -z "$VIASH_TEMP" ]; then - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$VIASH_TMP} - VIASH_TEMP=${VIASH_TEMP:-$TMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TMP} - VIASH_TEMP=${VIASH_TEMP:-$TEMPDIR} - VIASH_TEMP=${VIASH_TEMP:-$TEMP} - VIASH_TEMP=${VIASH_TEMP:-/tmp} -fi - -# define helper functions -# ViashQuote: put quotes around non flag values -# $1 : unquoted string -# return : possibly quoted string -# examples: -# ViashQuote --foo # returns --foo -# ViashQuote bar # returns 'bar' -# Viashquote --foo=bar # returns --foo='bar' -function ViashQuote { - if [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+=.+$ ]]; then - echo "$1" | sed "s#=\(.*\)#='\1'#" - elif [[ "$1" =~ ^-+[a-zA-Z0-9_\-]+$ ]]; then - echo "$1" - else - echo "'$1'" - fi -} -# ViashRemoveFlags: Remove leading flag -# $1 : string with a possible leading flag -# return : string without possible leading flag -# examples: -# ViashRemoveFlags --foo=bar # returns bar -function ViashRemoveFlags { - echo "$1" | sed 's/^--*[a-zA-Z0-9_\-]*=//' -} -# ViashSourceDir: return the path of a bash file, following symlinks -# usage : ViashSourceDir ${BASH_SOURCE[0]} -# $1 : Should always be set to ${BASH_SOURCE[0]} -# returns : The absolute path of the bash file -function ViashSourceDir { - local source="$1" - while [ -h "$source" ]; do - local dir="$( cd -P "$( dirname "$source" )" >/dev/null 2>&1 && pwd )" - source="$(readlink "$source")" - [[ $source != /* ]] && source="$dir/$source" - done - cd -P "$( dirname "$source" )" >/dev/null 2>&1 && pwd -} -# ViashFindTargetDir: return the path of the '.build.yaml' file, following symlinks -# usage : ViashFindTargetDir 'ScriptPath' -# $1 : The location from where to start the upward search -# returns : The absolute path of the '.build.yaml' file -function ViashFindTargetDir { - local source="$1" - while [[ "$source" != "" && ! -e "$source/.build.yaml" ]]; do - source=${source%/*} - done - echo $source -} -# see https://en.wikipedia.org/wiki/Syslog#Severity_level -VIASH_LOGCODE_EMERGENCY=0 -VIASH_LOGCODE_ALERT=1 -VIASH_LOGCODE_CRITICAL=2 -VIASH_LOGCODE_ERROR=3 -VIASH_LOGCODE_WARNING=4 -VIASH_LOGCODE_NOTICE=5 -VIASH_LOGCODE_INFO=6 -VIASH_LOGCODE_DEBUG=7 -VIASH_VERBOSITY=$VIASH_LOGCODE_NOTICE - -# ViashLog: Log events depending on the verbosity level -# usage: ViashLog 1 alert Oh no something went wrong! -# $1: required verbosity level -# $2: display tag -# $3+: messages to display -# stdout: Your input, prepended by '[$2] '. -function ViashLog { - local required_level="$1" - local display_tag="$2" - shift 2 - if [ $VIASH_VERBOSITY -ge $required_level ]; then - >&2 echo "[$display_tag]" "$@" - fi -} - -# ViashEmergency: log events when the system is unstable -# usage: ViashEmergency Oh no something went wrong. -# stdout: Your input, prepended by '[emergency] '. -function ViashEmergency { - ViashLog $VIASH_LOGCODE_EMERGENCY emergency "$@" -} - -# ViashAlert: log events when actions must be taken immediately (e.g. corrupted system database) -# usage: ViashAlert Oh no something went wrong. -# stdout: Your input, prepended by '[alert] '. -function ViashAlert { - ViashLog $VIASH_LOGCODE_ALERT alert "$@" -} - -# ViashCritical: log events when a critical condition occurs -# usage: ViashCritical Oh no something went wrong. -# stdout: Your input, prepended by '[critical] '. -function ViashCritical { - ViashLog $VIASH_LOGCODE_CRITICAL critical "$@" -} - -# ViashError: log events when an error condition occurs -# usage: ViashError Oh no something went wrong. -# stdout: Your input, prepended by '[error] '. -function ViashError { - ViashLog $VIASH_LOGCODE_ERROR error "$@" -} - -# ViashWarning: log potentially abnormal events -# usage: ViashWarning Something may have gone wrong. -# stdout: Your input, prepended by '[warning] '. -function ViashWarning { - ViashLog $VIASH_LOGCODE_WARNING warning "$@" -} - -# ViashNotice: log significant but normal events -# usage: ViashNotice This just happened. -# stdout: Your input, prepended by '[notice] '. -function ViashNotice { - ViashLog $VIASH_LOGCODE_NOTICE notice "$@" -} - -# ViashInfo: log normal events -# usage: ViashInfo This just happened. -# stdout: Your input, prepended by '[info] '. -function ViashInfo { - ViashLog $VIASH_LOGCODE_INFO info "$@" -} - -# ViashDebug: log all events, for debugging purposes -# usage: ViashDebug This just happened. -# stdout: Your input, prepended by '[debug] '. -function ViashDebug { - ViashLog $VIASH_LOGCODE_DEBUG debug "$@" -} - -# find source folder of this component -VIASH_META_RESOURCES_DIR=`ViashSourceDir ${BASH_SOURCE[0]}` - -# find the root of the built components & dependencies -VIASH_TARGET_DIR=`ViashFindTargetDir $VIASH_META_RESOURCES_DIR` - -# define meta fields -VIASH_META_NAME="bidcell" -VIASH_META_FUNCTIONALITY_NAME="bidcell" -VIASH_META_EXECUTABLE="$VIASH_META_RESOURCES_DIR/$VIASH_META_NAME" -VIASH_META_CONFIG="$VIASH_META_RESOURCES_DIR/.config.vsh.yaml" -VIASH_META_TEMP_DIR="$VIASH_TEMP" - - - -# initialise variables -VIASH_MODE='run' -VIASH_ENGINE_ID='docker' - -######## Helper functions for setting up Docker images for viash ######## -# expects: ViashDockerBuild - -# ViashDockerInstallationCheck: check whether Docker is installed correctly -# -# examples: -# ViashDockerInstallationCheck -function ViashDockerInstallationCheck { - ViashDebug "Checking whether Docker is installed" - if [ ! command -v docker &> /dev/null ]; then - ViashCritical "Docker doesn't seem to be installed. See 'https://docs.docker.com/get-docker/' for instructions." - exit 1 - fi - - ViashDebug "Checking whether the Docker daemon is running" - local save=$-; set +e - local docker_version=$(docker version --format '{{.Client.APIVersion}}' 2> /dev/null) - local out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashCritical "Docker daemon does not seem to be running. Try one of the following:" - ViashCritical "- Try running 'dockerd' in the command line" - ViashCritical "- See https://docs.docker.com/config/daemon/" - exit 1 - fi -} - -# ViashDockerRemoteTagCheck: check whether a Docker image is available -# on a remote. Assumes `docker login` has been performed, if relevant. -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerRemoteTagCheck python:latest -# echo $? # returns '0' -# ViashDockerRemoteTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerRemoteTagCheck { - docker manifest inspect $1 > /dev/null 2> /dev/null -} - -# ViashDockerLocalTagCheck: check whether a Docker image is available locally -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# docker pull python:latest -# ViashDockerLocalTagCheck python:latest -# echo $? # returns '0' -# ViashDockerLocalTagCheck sdaizudceahifu -# echo $? # returns '1' -function ViashDockerLocalTagCheck { - [ -n "$(docker images -q $1)" ] -} - -# ViashDockerPull: pull a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPull python:latest -# echo $? # returns '0' -# ViashDockerPull sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPull { - ViashNotice "Checking if Docker image is available at '$1'" - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker pull $1 && return 0 || return 1 - else - local save=$-; set +e - docker pull $1 2> /dev/null > /dev/null - local out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashWarning "Could not pull from '$1'. Docker image doesn't exist or is not accessible." - fi - return $out - fi -} - -# ViashDockerPush: push a Docker image -# -# $1 : image identifier with format `[registry/]image[:tag]` -# exit code $? : whether or not the image was found -# examples: -# ViashDockerPush python:latest -# echo $? # returns '0' -# ViashDockerPush sdaizudceahifu -# echo $? # returns '1' -function ViashDockerPush { - ViashNotice "Pushing image to '$1'" - local save=$-; set +e - local out - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - docker push $1 - out=$? - else - docker push $1 2> /dev/null > /dev/null - out=$? - fi - [[ $save =~ e ]] && set -e - if [ $out -eq 0 ]; then - ViashNotice "Container '$1' push succeeded." - else - ViashError "Container '$1' push errored. You might not be logged in or have the necessary permissions." - fi - return $out -} - -# ViashDockerPullElseBuild: pull a Docker image, else build it -# -# $1 : image identifier with format `[registry/]image[:tag]` -# ViashDockerBuild : a Bash function which builds a docker image, takes image identifier as argument. -# examples: -# ViashDockerPullElseBuild mynewcomponent -function ViashDockerPullElseBuild { - local save=$-; set +e - ViashDockerPull $1 - local out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashDockerBuild $@ - fi -} - -# ViashDockerSetup: create a Docker image, according to specified docker setup strategy -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $2 : docker setup strategy, see DockerSetupStrategy.scala -# examples: -# ViashDockerSetup mynewcomponent alwaysbuild -function ViashDockerSetup { - local image_id="$1" - local setup_strategy="$2" - if [ "$setup_strategy" == "alwaysbuild" -o "$setup_strategy" == "build" -o "$setup_strategy" == "b" ]; then - ViashDockerBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") - elif [ "$setup_strategy" == "alwayspull" -o "$setup_strategy" == "pull" -o "$setup_strategy" == "p" ]; then - ViashDockerPull $image_id - elif [ "$setup_strategy" == "alwayspullelsebuild" -o "$setup_strategy" == "pullelsebuild" ]; then - ViashDockerPullElseBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") - elif [ "$setup_strategy" == "alwayspullelsecachedbuild" -o "$setup_strategy" == "pullelsecachedbuild" ]; then - ViashDockerPullElseBuild $image_id $(ViashDockerBuildArgs "$engine_id") - elif [ "$setup_strategy" == "alwayscachedbuild" -o "$setup_strategy" == "cachedbuild" -o "$setup_strategy" == "cb" ]; then - ViashDockerBuild $image_id $(ViashDockerBuildArgs "$engine_id") - elif [[ "$setup_strategy" =~ ^ifneedbe ]]; then - local save=$-; set +e - ViashDockerLocalTagCheck $image_id - local outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashInfo "Image $image_id already exists" - elif [ "$setup_strategy" == "ifneedbebuild" ]; then - ViashDockerBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") - elif [ "$setup_strategy" == "ifneedbecachedbuild" ]; then - ViashDockerBuild $image_id $(ViashDockerBuildArgs "$engine_id") - elif [ "$setup_strategy" == "ifneedbepull" ]; then - ViashDockerPull $image_id - elif [ "$setup_strategy" == "ifneedbepullelsebuild" ]; then - ViashDockerPullElseBuild $image_id --no-cache $(ViashDockerBuildArgs "$engine_id") - elif [ "$setup_strategy" == "ifneedbepullelsecachedbuild" ]; then - ViashDockerPullElseBuild $image_id $(ViashDockerBuildArgs "$engine_id") - else - ViashError "Unrecognised Docker strategy: $setup_strategy" - exit 1 - fi - elif [ "$setup_strategy" == "push" -o "$setup_strategy" == "forcepush" -o "$setup_strategy" == "alwayspush" ]; then - ViashDockerPush "$image_id" - elif [ "$setup_strategy" == "pushifnotpresent" -o "$setup_strategy" == "gentlepush" -o "$setup_strategy" == "maybepush" ]; then - local save=$-; set +e - ViashDockerRemoteTagCheck $image_id - local outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -eq 0 ]; then - ViashNotice "Container '$image_id' exists, doing nothing." - else - ViashNotice "Container '$image_id' does not yet exist." - ViashDockerPush "$image_id" - fi - elif [ "$setup_strategy" == "donothing" -o "$setup_strategy" == "meh" ]; then - ViashNotice "Skipping setup." - else - ViashError "Unrecognised Docker strategy: $setup_strategy" - exit 1 - fi -} - -# ViashDockerCheckCommands: Check whether a docker container has the required commands -# -# $1 : image identifier with format `[registry/]image[:tag]` -# $@ : commands to verify being present -# examples: -# ViashDockerCheckCommands bash:4.0 bash ps foo -function ViashDockerCheckCommands { - local image_id="$1" - shift 1 - local commands="$@" - local save=$-; set +e - local missing # mark 'missing' as local in advance, otherwise the exit code of the command will be missing and always be '0' - missing=$(docker run --rm --entrypoint=sh "$image_id" -c "for command in $commands; do command -v \$command >/dev/null 2>&1; if [ \$? -ne 0 ]; then echo \$command; exit 1; fi; done") - local outCheck=$? - [[ $save =~ e ]] && set -e - if [ $outCheck -ne 0 ]; then - ViashError "Docker container '$image_id' does not contain command '$missing'." - exit 1 - fi -} - -# ViashDockerBuild: build a docker image -# $1 : image identifier with format `[registry/]image[:tag]` -# $... : additional arguments to pass to docker build -# $VIASH_META_TEMP_DIR : temporary directory to store dockerfile & optional resources in -# $VIASH_META_NAME : name of the component -# $VIASH_META_RESOURCES_DIR : directory containing the resources -# $VIASH_VERBOSITY : verbosity level -# exit code $? : whether or not the image was built successfully -function ViashDockerBuild { - local image_id="$1" - shift 1 - - # create temporary directory to store dockerfile & optional resources in - local tmpdir=$(mktemp -d "$VIASH_META_TEMP_DIR/dockerbuild-$VIASH_META_NAME-XXXXXX") - local dockerfile="$tmpdir/Dockerfile" - function clean_up { - rm -rf "$tmpdir" - } - trap clean_up EXIT - - # store dockerfile and resources - ViashDockerfile "$VIASH_ENGINE_ID" > "$dockerfile" - - # generate the build command - local docker_build_cmd="docker build -t '$image_id' $@ '$VIASH_META_RESOURCES_DIR' -f '$dockerfile'" - - # build the container - ViashNotice "Building container '$image_id' with Dockerfile" - ViashInfo "$docker_build_cmd" - local save=$-; set +e - if [ $VIASH_VERBOSITY -ge $VIASH_LOGCODE_INFO ]; then - eval $docker_build_cmd - else - eval $docker_build_cmd &> "$tmpdir/docker_build.log" - fi - - # check exit code - local out=$? - [[ $save =~ e ]] && set -e - if [ $out -ne 0 ]; then - ViashError "Error occurred while building container '$image_id'" - if [ $VIASH_VERBOSITY -lt $VIASH_LOGCODE_INFO ]; then - ViashError "Transcript: --------------------------------" - cat "$tmpdir/docker_build.log" - ViashError "End of transcript --------------------------" - fi - exit 1 - fi -} - -######## End of helper functions for setting up Docker images for viash ######## - -# ViashDockerFile: print the dockerfile to stdout -# $1 : engine identifier -# return : dockerfile required to run this component -# examples: -# ViashDockerFile -function ViashDockerfile { - local engine_id="$1" - - if [[ "$engine_id" == "docker" ]]; then - cat << 'VIASHDOCKER' -FROM openproblems/base_python:1 -ENTRYPOINT [] -RUN pip install --upgrade pip && \ - pip install --upgrade --no-cache-dir "squidpy" "rasterio" && \ - pip install --upgrade --no-cache-dir "git+https://github.com/theislab/txsim@dev" - -RUN pip install --upgrade pip && \ - pip install --upgrade --no-cache-dir "spatialdata" - -RUN pip install --upgrade pip && \ - pip install --upgrade --no-cache-dir "scanpy" - -RUN pip install --upgrade pip && \ - pip install --upgrade --no-cache-dir "tifffile" - -RUN pip install --upgrade pip && \ - pip install --upgrade --no-cache-dir "opencv-python" - -RUN pip install --upgrade pip && \ - pip install --upgrade --no-cache-dir "natsort" - -RUN pip install --upgrade pip && \ - pip install --upgrade --no-cache-dir "bidcell" - -RUN pip install --upgrade pip && \ - pip install --upgrade --no-cache-dir "dask[dataframe]" - -RUN pip install --upgrade pip && \ - pip install --upgrade --no-cache-dir "spatialdata-plot" - -RUN pip install --upgrade pip && \ - pip install --upgrade --no-cache-dir "pyyaml" - -RUN pip install --upgrade pip && \ - pip install --upgrade --no-cache-dir "scikit-image" - -LABEL org.opencontainers.image.description="Companion container for running component methods_segmentation bidcell" -LABEL org.opencontainers.image.created="2025-09-08T16:27:18+02:00" -LABEL org.opencontainers.image.source="https://github.com/SydneyBioX/BIDCell" -LABEL org.opencontainers.image.revision="16c6e21ef81efdd062143cad6b79cf992167e1c5" -LABEL org.opencontainers.image.version="dev" - -VIASHDOCKER - fi -} - -# ViashDockerBuildArgs: return the arguments to pass to docker build -# $1 : engine identifier -# return : arguments to pass to docker build -function ViashDockerBuildArgs { - local engine_id="$1" - - if [[ "$engine_id" == "docker" ]]; then - echo "" - fi -} - -# ViashAbsolutePath: generate absolute path from relative path -# borrowed from https://stackoverflow.com/a/21951256 -# $1 : relative filename -# return : absolute path -# examples: -# ViashAbsolutePath some_file.txt # returns /path/to/some_file.txt -# ViashAbsolutePath /foo/bar/.. # returns /foo -function ViashAbsolutePath { - local thePath - local parr - local outp - local len - if [[ ! "$1" =~ ^/ ]]; then - thePath="$PWD/$1" - else - thePath="$1" - fi - echo "$thePath" | ( - IFS=/ - read -a parr - declare -a outp - for i in "${parr[@]}"; do - case "$i" in - ''|.) continue ;; - ..) - len=${#outp[@]} - if ((len==0)); then - continue - else - unset outp[$((len-1))] - fi - ;; - *) - len=${#outp[@]} - outp[$len]="$i" - ;; - esac - done - echo /"${outp[*]}" - ) -} -# ViashDockerAutodetectMount: auto configuring docker mounts from parameters -# $1 : The parameter value -# returns : New parameter -# $VIASH_DIRECTORY_MOUNTS : Added another parameter to be passed to docker -# $VIASH_DOCKER_AUTOMOUNT_PREFIX : The prefix to be used for the automounts -# examples: -# ViashDockerAutodetectMount /path/to/bar # returns '/viash_automount/path/to/bar' -# ViashDockerAutodetectMountArg /path/to/bar # returns '--volume="/path/to:/viash_automount/path/to"' -function ViashDockerAutodetectMount { - local abs_path=$(ViashAbsolutePath "$1") - local mount_source - local base_name - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - local mount_target="$VIASH_DOCKER_AUTOMOUNT_PREFIX$mount_source" - if [ -z "$base_name" ]; then - echo "$mount_target" - else - echo "$mount_target/$base_name" - fi -} -function ViashDockerAutodetectMountArg { - local abs_path=$(ViashAbsolutePath "$1") - local mount_source - local base_name - if [ -d "$abs_path" ]; then - mount_source="$abs_path" - base_name="" - else - mount_source=`dirname "$abs_path"` - base_name=`basename "$abs_path"` - fi - local mount_target="$VIASH_DOCKER_AUTOMOUNT_PREFIX$mount_source" - ViashDebug "ViashDockerAutodetectMountArg $1 -> $mount_source -> $mount_target" - echo "--volume=\"$mount_source:$mount_target\"" -} -function ViashDockerStripAutomount { - local abs_path=$(ViashAbsolutePath "$1") - echo "${abs_path#$VIASH_DOCKER_AUTOMOUNT_PREFIX}" -} -# initialise variables -VIASH_DIRECTORY_MOUNTS=() - -# configure default docker automount prefix if it is unset -if [ -z "${VIASH_DOCKER_AUTOMOUNT_PREFIX+x}" ]; then - VIASH_DOCKER_AUTOMOUNT_PREFIX="/viash_automount" -fi - -# initialise docker variables -VIASH_DOCKER_RUN_ARGS=(-i --rm) - - -# ViashHelp: Display helpful explanation about this executable -function ViashHelp { - echo "bidcell dev" - echo "" - echo "BIDCell is a deep learning method for cell segmentation in spatial" - echo "transcriptomics data that uses both morphology and transcript information." - echo "" - echo "Arguments:" - echo " --input" - echo " type: file, required parameter, file must exist" - echo " example:" - echo "resources_test/task_ist_preprocessing/mouse_brain_combined/raw_ist.zarr" - echo " This dataset contains preprocessed images, labels, points, shapes, and" - echo " tables for spatial transcriptomics data." - echo "" - echo " --output" - echo " type: file, required parameter, output, file must exist" - echo " example:" - echo "resources_test/task_ist_preprocessing/mouse_brain_combined/segmentation.zarr" - echo " This dataset contains a segmentation of the spatial transcriptomics" - echo " data." - echo "" - echo " --single_cell_ref" - echo " type: file, file must exist" - echo " default:" - echo "task_ist_preprocessing/resources_test/common/2023_yao_mouse_brain_scrnaseq_10xv2/dataset.h5ad" - echo " Path to single-cell reference data in H5AD format for marker gene" - echo " identification" - echo "" - echo " --max_overlaps_pos" - echo " type: integer" - echo " default: 4" - echo " Maximum number of cell types that can share a positive marker" - echo "" - echo " --max_overlaps_neg" - echo " type: integer" - echo " default: 15" - echo " Maximum number of cell types that can share a negative marker" - echo "" - echo " --model_epochs" - echo " type: integer" - echo " default: 10" - echo " Number of training epochs for BIDCell model" - echo "" - echo " --min_cell_size" - echo " type: integer" - echo " default: 15" - echo " Minimum cell size in pixels" - echo "" - echo "Viash built in Computational Requirements:" - echo " ---cpus=INT" - echo " Number of CPUs to use" - echo " ---memory=STRING" - echo " Amount of memory to use. Examples: 4GB, 3MiB." - echo "" - echo "Viash built in Docker:" - echo " ---setup=STRATEGY" - echo " Setup the docker container. Options are: alwaysbuild, alwayscachedbuild, ifneedbebuild, ifneedbecachedbuild, alwayspull, alwayspullelsebuild, alwayspullelsecachedbuild, ifneedbepull, ifneedbepullelsebuild, ifneedbepullelsecachedbuild, push, pushifnotpresent, donothing." - echo " Default: ifneedbepullelsecachedbuild" - echo " ---dockerfile" - echo " Print the dockerfile to stdout." - echo " ---docker_run_args=ARG" - echo " Provide runtime arguments to Docker. See the documentation on \`docker run\` for more information." - echo " ---docker_image_id" - echo " Print the docker image id to stdout." - echo " ---debug" - echo " Enter the docker container for debugging purposes." - echo "" - echo "Viash built in Engines:" - echo " ---engine=ENGINE_ID" - echo " Specify the engine to use. Options are: docker, native." - echo " Default: docker" -} - -# initialise array -VIASH_POSITIONAL_ARGS='' - -while [[ $# -gt 0 ]]; do - case "$1" in - -h|--help) - ViashHelp - exit - ;; - ---v|---verbose) - let "VIASH_VERBOSITY=VIASH_VERBOSITY+1" - shift 1 - ;; - ---verbosity) - VIASH_VERBOSITY="$2" - shift 2 - ;; - ---verbosity=*) - VIASH_VERBOSITY="$(ViashRemoveFlags "$1")" - shift 1 - ;; - --version) - echo "bidcell dev" - exit - ;; - --input) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --input. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --input=*) - [ -n "$VIASH_PAR_INPUT" ] && ViashError Bad arguments for option \'--input=*\': \'$VIASH_PAR_INPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_INPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - --output) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --output. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --output=*) - [ -n "$VIASH_PAR_OUTPUT" ] && ViashError Bad arguments for option \'--output=*\': \'$VIASH_PAR_OUTPUT\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_OUTPUT=$(ViashRemoveFlags "$1") - shift 1 - ;; - --single_cell_ref) - [ -n "$VIASH_PAR_SINGLE_CELL_REF" ] && ViashError Bad arguments for option \'--single_cell_ref\': \'$VIASH_PAR_SINGLE_CELL_REF\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SINGLE_CELL_REF="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --single_cell_ref. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --single_cell_ref=*) - [ -n "$VIASH_PAR_SINGLE_CELL_REF" ] && ViashError Bad arguments for option \'--single_cell_ref=*\': \'$VIASH_PAR_SINGLE_CELL_REF\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_SINGLE_CELL_REF=$(ViashRemoveFlags "$1") - shift 1 - ;; - --max_overlaps_pos) - [ -n "$VIASH_PAR_MAX_OVERLAPS_POS" ] && ViashError Bad arguments for option \'--max_overlaps_pos\': \'$VIASH_PAR_MAX_OVERLAPS_POS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MAX_OVERLAPS_POS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --max_overlaps_pos. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --max_overlaps_pos=*) - [ -n "$VIASH_PAR_MAX_OVERLAPS_POS" ] && ViashError Bad arguments for option \'--max_overlaps_pos=*\': \'$VIASH_PAR_MAX_OVERLAPS_POS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MAX_OVERLAPS_POS=$(ViashRemoveFlags "$1") - shift 1 - ;; - --max_overlaps_neg) - [ -n "$VIASH_PAR_MAX_OVERLAPS_NEG" ] && ViashError Bad arguments for option \'--max_overlaps_neg\': \'$VIASH_PAR_MAX_OVERLAPS_NEG\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MAX_OVERLAPS_NEG="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --max_overlaps_neg. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --max_overlaps_neg=*) - [ -n "$VIASH_PAR_MAX_OVERLAPS_NEG" ] && ViashError Bad arguments for option \'--max_overlaps_neg=*\': \'$VIASH_PAR_MAX_OVERLAPS_NEG\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MAX_OVERLAPS_NEG=$(ViashRemoveFlags "$1") - shift 1 - ;; - --model_epochs) - [ -n "$VIASH_PAR_MODEL_EPOCHS" ] && ViashError Bad arguments for option \'--model_epochs\': \'$VIASH_PAR_MODEL_EPOCHS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MODEL_EPOCHS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --model_epochs. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --model_epochs=*) - [ -n "$VIASH_PAR_MODEL_EPOCHS" ] && ViashError Bad arguments for option \'--model_epochs=*\': \'$VIASH_PAR_MODEL_EPOCHS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MODEL_EPOCHS=$(ViashRemoveFlags "$1") - shift 1 - ;; - --min_cell_size) - [ -n "$VIASH_PAR_MIN_CELL_SIZE" ] && ViashError Bad arguments for option \'--min_cell_size\': \'$VIASH_PAR_MIN_CELL_SIZE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MIN_CELL_SIZE="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to --min_cell_size. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - --min_cell_size=*) - [ -n "$VIASH_PAR_MIN_CELL_SIZE" ] && ViashError Bad arguments for option \'--min_cell_size=*\': \'$VIASH_PAR_MIN_CELL_SIZE\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_PAR_MIN_CELL_SIZE=$(ViashRemoveFlags "$1") - shift 1 - ;; - ---engine) - VIASH_ENGINE_ID="$2" - shift 2 - ;; - ---engine=*) - VIASH_ENGINE_ID="$(ViashRemoveFlags "$1")" - shift 1 - ;; - ---setup) - VIASH_MODE='setup' - VIASH_SETUP_STRATEGY="$2" - shift 2 - ;; - ---setup=*) - VIASH_MODE='setup' - VIASH_SETUP_STRATEGY="$(ViashRemoveFlags "$1")" - shift 1 - ;; - ---dockerfile) - VIASH_MODE='dockerfile' - shift 1 - ;; - ---docker_run_args) - VIASH_DOCKER_RUN_ARGS+=("$2") - shift 2 - ;; - ---docker_run_args=*) - VIASH_DOCKER_RUN_ARGS+=("$(ViashRemoveFlags "$1")") - shift 1 - ;; - ---docker_image_id) - VIASH_MODE='docker_image_id' - shift 1 - ;; - ---debug) - VIASH_MODE='debug' - shift 1 - ;; - ---cpus) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---cpus. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---cpus=*) - [ -n "$VIASH_META_CPUS" ] && ViashError Bad arguments for option \'---cpus=*\': \'$VIASH_META_CPUS\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_CPUS=$(ViashRemoveFlags "$1") - shift 1 - ;; - ---memory) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY="$2" - [ $# -lt 2 ] && ViashError Not enough arguments passed to ---memory. Use "--help" to get more information on the parameters. && exit 1 - shift 2 - ;; - ---memory=*) - [ -n "$VIASH_META_MEMORY" ] && ViashError Bad arguments for option \'---memory=*\': \'$VIASH_META_MEMORY\' \& \'$2\' - you should provide exactly one argument for this option. && exit 1 - VIASH_META_MEMORY=$(ViashRemoveFlags "$1") - shift 1 - ;; - *) # positional arg or unknown option - # since the positional args will be eval'd, can we always quote, instead of using ViashQuote - VIASH_POSITIONAL_ARGS="$VIASH_POSITIONAL_ARGS '$1'" - [[ $1 == -* ]] && ViashWarning $1 looks like a parameter but is not a defined parameter and will instead be treated as a positional argument. Use "--help" to get more information on the parameters. - shift # past argument - ;; - esac -done - -# parse positional parameters -eval set -- $VIASH_POSITIONAL_ARGS - - -if [ "$VIASH_ENGINE_ID" == "native" ] ; then - VIASH_ENGINE_TYPE='native' -elif [ "$VIASH_ENGINE_ID" == "docker" ] ; then - VIASH_ENGINE_TYPE='docker' -else - ViashError "Engine '$VIASH_ENGINE_ID' is not recognized. Options are: docker, native." - exit 1 -fi - -if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then - # check if docker is installed properly - ViashDockerInstallationCheck - - # determine docker image id - if [[ "$VIASH_ENGINE_ID" == 'docker' ]]; then - VIASH_DOCKER_IMAGE_ID='ghcr.io/openproblems-bio/task_ist_preprocessing/methods_segmentation/bidcell:dev' - fi - - # print dockerfile - if [ "$VIASH_MODE" == "dockerfile" ]; then - ViashDockerfile "$VIASH_ENGINE_ID" - exit 0 - - elif [ "$VIASH_MODE" == "docker_image_id" ]; then - echo "$VIASH_DOCKER_IMAGE_ID" - exit 0 - - # enter docker container - elif [[ "$VIASH_MODE" == "debug" ]]; then - VIASH_CMD="docker run --entrypoint=bash ${VIASH_DOCKER_RUN_ARGS[@]} -v '$(pwd)':/pwd --workdir /pwd -t $VIASH_DOCKER_IMAGE_ID" - ViashNotice "+ $VIASH_CMD" - eval $VIASH_CMD - exit - - # build docker image - elif [ "$VIASH_MODE" == "setup" ]; then - ViashDockerSetup "$VIASH_DOCKER_IMAGE_ID" "$VIASH_SETUP_STRATEGY" - ViashDockerCheckCommands "$VIASH_DOCKER_IMAGE_ID" 'bash' - exit 0 - fi - - # check if docker image exists - ViashDockerSetup "$VIASH_DOCKER_IMAGE_ID" ifneedbepullelsecachedbuild - ViashDockerCheckCommands "$VIASH_DOCKER_IMAGE_ID" 'bash' -fi - -# setting computational defaults - -# helper function for parsing memory strings -function ViashMemoryAsBytes { - local memory=`echo "$1" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]'` - local memory_regex='^([0-9]+)([kmgtp]i?b?|b)$' - if [[ $memory =~ $memory_regex ]]; then - local number=${memory/[^0-9]*/} - local symbol=${memory/*[0-9]/} - - case $symbol in - b) memory_b=$number ;; - kb|k) memory_b=$(( $number * 1000 )) ;; - mb|m) memory_b=$(( $number * 1000 * 1000 )) ;; - gb|g) memory_b=$(( $number * 1000 * 1000 * 1000 )) ;; - tb|t) memory_b=$(( $number * 1000 * 1000 * 1000 * 1000 )) ;; - pb|p) memory_b=$(( $number * 1000 * 1000 * 1000 * 1000 * 1000 )) ;; - kib|ki) memory_b=$(( $number * 1024 )) ;; - mib|mi) memory_b=$(( $number * 1024 * 1024 )) ;; - gib|gi) memory_b=$(( $number * 1024 * 1024 * 1024 )) ;; - tib|ti) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 )) ;; - pib|pi) memory_b=$(( $number * 1024 * 1024 * 1024 * 1024 * 1024 )) ;; - esac - echo "$memory_b" - fi -} -# compute memory in different units -if [ ! -z ${VIASH_META_MEMORY+x} ]; then - VIASH_META_MEMORY_B=`ViashMemoryAsBytes $VIASH_META_MEMORY` - # do not define other variables if memory_b is an empty string - if [ ! -z "$VIASH_META_MEMORY_B" ]; then - VIASH_META_MEMORY_KB=$(( ($VIASH_META_MEMORY_B+999) / 1000 )) - VIASH_META_MEMORY_MB=$(( ($VIASH_META_MEMORY_KB+999) / 1000 )) - VIASH_META_MEMORY_GB=$(( ($VIASH_META_MEMORY_MB+999) / 1000 )) - VIASH_META_MEMORY_TB=$(( ($VIASH_META_MEMORY_GB+999) / 1000 )) - VIASH_META_MEMORY_PB=$(( ($VIASH_META_MEMORY_TB+999) / 1000 )) - VIASH_META_MEMORY_KIB=$(( ($VIASH_META_MEMORY_B+1023) / 1024 )) - VIASH_META_MEMORY_MIB=$(( ($VIASH_META_MEMORY_KIB+1023) / 1024 )) - VIASH_META_MEMORY_GIB=$(( ($VIASH_META_MEMORY_MIB+1023) / 1024 )) - VIASH_META_MEMORY_TIB=$(( ($VIASH_META_MEMORY_GIB+1023) / 1024 )) - VIASH_META_MEMORY_PIB=$(( ($VIASH_META_MEMORY_TIB+1023) / 1024 )) - else - # unset memory if string is empty - unset $VIASH_META_MEMORY_B - fi -fi -# unset nproc if string is empty -if [ -z "$VIASH_META_CPUS" ]; then - unset $VIASH_META_CPUS -fi - - -# check whether required parameters exist -if [ -z ${VIASH_PAR_INPUT+x} ]; then - ViashError '--input' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_PAR_OUTPUT+x} ]; then - ViashError '--output' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_NAME+x} ]; then - ViashError 'name' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then - ViashError 'functionality_name' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_RESOURCES_DIR+x} ]; then - ViashError 'resources_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_EXECUTABLE+x} ]; then - ViashError 'executable' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_CONFIG+x} ]; then - ViashError 'config' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi -if [ -z ${VIASH_META_TEMP_DIR+x} ]; then - ViashError 'temp_dir' is a required argument. Use "--help" to get more information on the parameters. - exit 1 -fi - -# filling in defaults -if [ -z ${VIASH_PAR_SINGLE_CELL_REF+x} ]; then - VIASH_PAR_SINGLE_CELL_REF="task_ist_preprocessing/resources_test/common/2023_yao_mouse_brain_scrnaseq_10xv2/dataset.h5ad" -fi -if [ -z ${VIASH_PAR_MAX_OVERLAPS_POS+x} ]; then - VIASH_PAR_MAX_OVERLAPS_POS="4" -fi -if [ -z ${VIASH_PAR_MAX_OVERLAPS_NEG+x} ]; then - VIASH_PAR_MAX_OVERLAPS_NEG="15" -fi -if [ -z ${VIASH_PAR_MODEL_EPOCHS+x} ]; then - VIASH_PAR_MODEL_EPOCHS="10" -fi -if [ -z ${VIASH_PAR_MIN_CELL_SIZE+x} ]; then - VIASH_PAR_MIN_CELL_SIZE="15" -fi - -# check whether required files exist -if [ ! -z "$VIASH_PAR_INPUT" ] && [ ! -e "$VIASH_PAR_INPUT" ]; then - ViashError "Input file '$VIASH_PAR_INPUT' does not exist." - exit 1 -fi -if [ ! -z "$VIASH_PAR_SINGLE_CELL_REF" ] && [ ! -e "$VIASH_PAR_SINGLE_CELL_REF" ]; then - ViashError "Input file '$VIASH_PAR_SINGLE_CELL_REF' does not exist." - exit 1 -fi - -# check whether parameters values are of the right type -if [[ -n "$VIASH_PAR_MAX_OVERLAPS_POS" ]]; then - if ! [[ "$VIASH_PAR_MAX_OVERLAPS_POS" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--max_overlaps_pos' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_MAX_OVERLAPS_NEG" ]]; then - if ! [[ "$VIASH_PAR_MAX_OVERLAPS_NEG" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--max_overlaps_neg' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_MODEL_EPOCHS" ]]; then - if ! [[ "$VIASH_PAR_MODEL_EPOCHS" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--model_epochs' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_PAR_MIN_CELL_SIZE" ]]; then - if ! [[ "$VIASH_PAR_MIN_CELL_SIZE" =~ ^[-+]?[0-9]+$ ]]; then - ViashError '--min_cell_size' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_CPUS" ]]; then - if ! [[ "$VIASH_META_CPUS" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'cpus' has to be an integer. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_B" ]]; then - if ! [[ "$VIASH_META_MEMORY_B" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_b' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_KB" ]]; then - if ! [[ "$VIASH_META_MEMORY_KB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_kb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_MB" ]]; then - if ! [[ "$VIASH_META_MEMORY_MB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_mb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_GB" ]]; then - if ! [[ "$VIASH_META_MEMORY_GB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_gb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_TB" ]]; then - if ! [[ "$VIASH_META_MEMORY_TB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_tb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_PB" ]]; then - if ! [[ "$VIASH_META_MEMORY_PB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_pb' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_KIB" ]]; then - if ! [[ "$VIASH_META_MEMORY_KIB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_kib' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_MIB" ]]; then - if ! [[ "$VIASH_META_MEMORY_MIB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_mib' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_GIB" ]]; then - if ! [[ "$VIASH_META_MEMORY_GIB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_gib' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_TIB" ]]; then - if ! [[ "$VIASH_META_MEMORY_TIB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_tib' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi -if [[ -n "$VIASH_META_MEMORY_PIB" ]]; then - if ! [[ "$VIASH_META_MEMORY_PIB" =~ ^[-+]?[0-9]+$ ]]; then - ViashError 'memory_pib' has to be a long. Use "--help" to get more information on the parameters. - exit 1 - fi -fi - -# create parent directories of output files, if so desired -if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -d "$(dirname "$VIASH_PAR_OUTPUT")" ]; then - mkdir -p "$(dirname "$VIASH_PAR_OUTPUT")" -fi - -if [ "$VIASH_ENGINE_ID" == "native" ] ; then - if [ "$VIASH_MODE" == "run" ]; then - VIASH_CMD="bash" - else - ViashError "Engine '$VIASH_ENGINE_ID' does not support mode '$VIASH_MODE'." - exit 1 - fi -fi - -if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then - # detect volumes from file arguments - VIASH_CHOWN_VARS=() -if [ ! -z "$VIASH_PAR_INPUT" ]; then - VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_INPUT")" ) - VIASH_PAR_INPUT=$(ViashDockerAutodetectMount "$VIASH_PAR_INPUT") -fi -if [ ! -z "$VIASH_PAR_OUTPUT" ]; then - VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_OUTPUT")" ) - VIASH_PAR_OUTPUT=$(ViashDockerAutodetectMount "$VIASH_PAR_OUTPUT") - VIASH_CHOWN_VARS+=( "$VIASH_PAR_OUTPUT" ) -fi -if [ ! -z "$VIASH_PAR_SINGLE_CELL_REF" ]; then - VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_PAR_SINGLE_CELL_REF")" ) - VIASH_PAR_SINGLE_CELL_REF=$(ViashDockerAutodetectMount "$VIASH_PAR_SINGLE_CELL_REF") -fi -if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then - VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_RESOURCES_DIR")" ) - VIASH_META_RESOURCES_DIR=$(ViashDockerAutodetectMount "$VIASH_META_RESOURCES_DIR") -fi -if [ ! -z "$VIASH_META_EXECUTABLE" ]; then - VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_EXECUTABLE")" ) - VIASH_META_EXECUTABLE=$(ViashDockerAutodetectMount "$VIASH_META_EXECUTABLE") -fi -if [ ! -z "$VIASH_META_CONFIG" ]; then - VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_CONFIG")" ) - VIASH_META_CONFIG=$(ViashDockerAutodetectMount "$VIASH_META_CONFIG") -fi -if [ ! -z "$VIASH_META_TEMP_DIR" ]; then - VIASH_DIRECTORY_MOUNTS+=( "$(ViashDockerAutodetectMountArg "$VIASH_META_TEMP_DIR")" ) - VIASH_META_TEMP_DIR=$(ViashDockerAutodetectMount "$VIASH_META_TEMP_DIR") -fi - - # get unique mounts - VIASH_UNIQUE_MOUNTS=($(for val in "${VIASH_DIRECTORY_MOUNTS[@]}"; do echo "$val"; done | sort -u)) -fi - -if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then - # change file ownership - function ViashPerformChown { - if (( ${#VIASH_CHOWN_VARS[@]} )); then - set +e - VIASH_CMD="docker run --entrypoint=bash --rm ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_DOCKER_IMAGE_ID -c 'chown $(id -u):$(id -g) --silent --recursive ${VIASH_CHOWN_VARS[@]}'" - ViashDebug "+ $VIASH_CMD" - eval $VIASH_CMD - set -e - fi - } - trap ViashPerformChown EXIT -fi - -if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then - # helper function for filling in extra docker args - if [ ! -z "$VIASH_META_MEMORY_B" ]; then - VIASH_DOCKER_RUN_ARGS+=("--memory=${VIASH_META_MEMORY_B}") - fi - if [ ! -z "$VIASH_META_CPUS" ]; then - VIASH_DOCKER_RUN_ARGS+=("--cpus=${VIASH_META_CPUS}") - fi -fi - -if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then - VIASH_CMD="docker run --entrypoint=bash ${VIASH_DOCKER_RUN_ARGS[@]} ${VIASH_UNIQUE_MOUNTS[@]} $VIASH_DOCKER_IMAGE_ID" -fi - - -# set dependency paths - - -ViashDebug "Running command: $(echo $VIASH_CMD)" -cat << VIASHEOF | eval $VIASH_CMD -set -e -tempscript=\$(mktemp "$VIASH_META_TEMP_DIR/viash-run-bidcell-XXXXXX").py -function clean_up { - rm "\$tempscript" -} -function interrupt { - echo -e "\nCTRL-C Pressed..." - exit 1 -} -trap clean_up EXIT -trap interrupt INT SIGINT -cat > "\$tempscript" << 'VIASHMAIN' -#!/usr/bin/env python3 - -import spatialdata as sd -import anndata as ad -import numpy as np -import pandas as pd -import scanpy as sc -import tifffile -import cv2 -import natsort -import os -import tempfile -import shutil -import yaml -import logging -import sys -from pathlib import Path - -## VIASH START -# The following code has been auto-generated by Viash. -par = { - 'input': $( if [ ! -z ${VIASH_PAR_INPUT+x} ]; then echo "r'${VIASH_PAR_INPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'output': $( if [ ! -z ${VIASH_PAR_OUTPUT+x} ]; then echo "r'${VIASH_PAR_OUTPUT//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'single_cell_ref': $( if [ ! -z ${VIASH_PAR_SINGLE_CELL_REF+x} ]; then echo "r'${VIASH_PAR_SINGLE_CELL_REF//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'max_overlaps_pos': $( if [ ! -z ${VIASH_PAR_MAX_OVERLAPS_POS+x} ]; then echo "int(r'${VIASH_PAR_MAX_OVERLAPS_POS//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'max_overlaps_neg': $( if [ ! -z ${VIASH_PAR_MAX_OVERLAPS_NEG+x} ]; then echo "int(r'${VIASH_PAR_MAX_OVERLAPS_NEG//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'model_epochs': $( if [ ! -z ${VIASH_PAR_MODEL_EPOCHS+x} ]; then echo "int(r'${VIASH_PAR_MODEL_EPOCHS//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'min_cell_size': $( if [ ! -z ${VIASH_PAR_MIN_CELL_SIZE+x} ]; then echo "int(r'${VIASH_PAR_MIN_CELL_SIZE//\'/\'\"\'\"r\'}')"; else echo None; fi ) -} -meta = { - 'name': $( if [ ! -z ${VIASH_META_NAME+x} ]; then echo "r'${VIASH_META_NAME//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'functionality_name': $( if [ ! -z ${VIASH_META_FUNCTIONALITY_NAME+x} ]; then echo "r'${VIASH_META_FUNCTIONALITY_NAME//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'resources_dir': $( if [ ! -z ${VIASH_META_RESOURCES_DIR+x} ]; then echo "r'${VIASH_META_RESOURCES_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'executable': $( if [ ! -z ${VIASH_META_EXECUTABLE+x} ]; then echo "r'${VIASH_META_EXECUTABLE//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'config': $( if [ ! -z ${VIASH_META_CONFIG+x} ]; then echo "r'${VIASH_META_CONFIG//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'temp_dir': $( if [ ! -z ${VIASH_META_TEMP_DIR+x} ]; then echo "r'${VIASH_META_TEMP_DIR//\'/\'\"\'\"r\'}'"; else echo None; fi ), - 'cpus': $( if [ ! -z ${VIASH_META_CPUS+x} ]; then echo "int(r'${VIASH_META_CPUS//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_b': $( if [ ! -z ${VIASH_META_MEMORY_B+x} ]; then echo "int(r'${VIASH_META_MEMORY_B//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_kb': $( if [ ! -z ${VIASH_META_MEMORY_KB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_mb': $( if [ ! -z ${VIASH_META_MEMORY_MB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_gb': $( if [ ! -z ${VIASH_META_MEMORY_GB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_tb': $( if [ ! -z ${VIASH_META_MEMORY_TB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_pb': $( if [ ! -z ${VIASH_META_MEMORY_PB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_kib': $( if [ ! -z ${VIASH_META_MEMORY_KIB+x} ]; then echo "int(r'${VIASH_META_MEMORY_KIB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_mib': $( if [ ! -z ${VIASH_META_MEMORY_MIB+x} ]; then echo "int(r'${VIASH_META_MEMORY_MIB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_gib': $( if [ ! -z ${VIASH_META_MEMORY_GIB+x} ]; then echo "int(r'${VIASH_META_MEMORY_GIB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_tib': $( if [ ! -z ${VIASH_META_MEMORY_TIB+x} ]; then echo "int(r'${VIASH_META_MEMORY_TIB//\'/\'\"\'\"r\'}')"; else echo None; fi ), - 'memory_pib': $( if [ ! -z ${VIASH_META_MEMORY_PIB+x} ]; then echo "int(r'${VIASH_META_MEMORY_PIB//\'/\'\"\'\"r\'}')"; else echo None; fi ) -} -dep = { - -} - -## VIASH END - -def generate_markers(ref_df, max_overlaps_pos=4, max_overlaps_neg=15): - """Generate positive and negative marker genes from single-cell reference.""" - n_genes = ref_df.shape[1] - 3 # Exclude ct_idx, cell_type, atlas columns - cell_types = natsort.natsorted(list(set(ref_df["cell_type"].tolist()))) - n_cell_types = len(cell_types) - - ref_expr = ref_df.iloc[:, :n_genes].to_numpy() - gene_names = ref_df.columns[:n_genes] - ct_idx = ref_df["ct_idx"].to_numpy() - - # Generate negative markers (genes with low expression in specific cell types) - pct_10 = np.percentile(ref_expr, 10, axis=1, keepdims=True) - pct_10 = np.tile(pct_10, (1, n_genes)) - low_expr_true = np.zeros(pct_10.shape) - low_expr_true[ref_expr <= pct_10] = 1 - - low_expr_true_agg = np.zeros((n_cell_types, n_genes)) - for ct in range(n_cell_types): - rows = np.where(ct_idx == ct)[0] - low_expr_true_ct = low_expr_true[rows] - low_expr_true_agg[ct, :] = np.prod(low_expr_true_ct, axis=0) - - overlaps = np.sum(low_expr_true_agg, 0) - too_many = np.where(overlaps > max_overlaps_neg)[0] - low_expr_true_agg[:, too_many] = 0 - df_neg = pd.DataFrame(low_expr_true_agg, index=cell_types, columns=gene_names) - - # Generate positive markers (genes with high expression in specific cell types) - pct_90 = np.percentile(ref_expr, 90, axis=1, keepdims=True) - pct_90 = np.tile(pct_90, (1, n_genes)) - high_expr_true = np.zeros(pct_90.shape) - high_expr_true[ref_expr >= pct_90] = 1 - - high_expr_true_agg = np.zeros((n_cell_types, n_genes)) - for ct in range(n_cell_types): - rows = np.where(ct_idx == ct)[0] - high_expr_true_ct = high_expr_true[rows] - high_expr_true_agg[ct, :] = np.prod(high_expr_true_ct, axis=0) - - overlaps = np.sum(high_expr_true_agg, 0) - too_many = np.where(overlaps > max_overlaps_pos)[0] - high_expr_true_agg[:, too_many] = 0 - df_pos = pd.DataFrame(high_expr_true_agg, index=cell_types, columns=gene_names) - - return df_pos, df_neg - -def create_bidcell_config(work_dir, epochs=10, min_size=15): - """Create BIDCell configuration YAML file.""" - config = { - "data_path": str(work_dir), - "image_name": "morphology_mip_pyramidal.tiff", - "transcript_file": "transcript.csv.gz", - "pos_marker_file": "pos_marker.csv", - "neg_marker_file": "neg_marker.csv", - "scref_file": "scref.csv", - "output_path": str(work_dir), - "model_params": { - "epochs": epochs, - "min_cell_size": min_size - } - } - - config_path = work_dir / "config.yaml" - with open(config_path, 'w') as f: - yaml.dump(config, f, default_flow_style=False) - - return config_path - -def main(): - print("Starting BIDCell segmentation", flush=True) - - # Create temporary working directory - work_dir = Path(tempfile.mkdtemp()) - - try: - # Load input spatial data - print("Loading input spatial data", flush=True) - sdata = sd.read_zarr(par["input"]) - - # Validate required components - if "transcripts" not in sdata.points: - raise ValueError("Input data must contain transcripts in points layer") - - # Get available image keys for morphology - image_keys = list(sdata.images.keys()) - morphology_key = None - for key in ["morphology_mip", "morphology", "image", "dapi"]: - if key in image_keys: - morphology_key = key - break - - if morphology_key is None: - raise ValueError(f"No morphology image found. Available keys: {image_keys}") - - print(f"Using morphology image: {morphology_key}", flush=True) - - # Extract genes from spatial data - sdata_genes = sdata.points["transcripts"]["feature_name"].unique().compute().sort_values().tolist() - print(f"Found {len(sdata_genes)} genes in spatial data", flush=True) - - # Extract morphology image - print("Extracting morphology image", flush=True) - if hasattr(sdata.images[morphology_key], 'data'): - img_data = sdata.images[morphology_key].data - else: - img_data = sdata.images[morphology_key] - - if hasattr(img_data, 'values'): - img = img_data.values - else: - img = np.array(img_data) - - # Handle different image formats - if img.ndim == 3: - img = np.squeeze(img) - if img.ndim != 2: - raise ValueError(f"Expected 2D image, got {img.ndim}D") - - # Save morphology image - morphology_path = work_dir / "morphology_mip_pyramidal.tiff" - tifffile.imwrite(morphology_path, img.astype(np.uint16)) - - # Process single-cell reference if provided - if par["single_cell_ref"]: - print("Processing single-cell reference", flush=True) - adata = sc.read_h5ad(par["single_cell_ref"]) - - # Find shared genes - shared_genes = [g for g in sdata_genes if g in adata.var["feature_name"].values] - print(f"Found {len(shared_genes)} shared genes", flush=True) - - if len(shared_genes) < 10: - print("Warning: Very few shared genes found, segmentation may be poor") - - # Filter reference to shared genes - adata = adata[:, adata.var["feature_name"].isin(shared_genes)] - adata.var_names = adata.var["feature_name"].astype(str) - - # Create reference dataframe - if "normalized" in adata.layers: - expr_data = adata[:, shared_genes].layers["normalized"] - elif "X" in adata.layers: - expr_data = adata[:, shared_genes].layers["X"] - else: - expr_data = adata[:, shared_genes].X - - if hasattr(expr_data, 'toarray'): - expr_data = expr_data.toarray() - - sc_ref = pd.DataFrame( - data=expr_data, - columns=shared_genes, - index=range(adata.n_obs) - ) - - # Add cell type information - if "cell_type" in adata.obs: - cell_type_col = adata.obs['cell_type'].astype('category') - elif "celltype" in adata.obs: - cell_type_col = adata.obs['celltype'].astype('category') - else: - # Create dummy cell types - print("No cell type information found, using dummy types") - cell_type_col = pd.Categorical(['Type1'] * adata.n_obs) - - sc_ref["ct_idx"] = cell_type_col.cat.codes.values - sc_ref["cell_type"] = cell_type_col.values - sc_ref["atlas"] = "custom" - - # Save reference - sc_ref.to_csv(work_dir / "scref.csv", index=False) - - # Generate markers - print("Generating marker genes", flush=True) - df_pos, df_neg = generate_markers( - sc_ref, - max_overlaps_pos=par["max_overlaps_pos"], - max_overlaps_neg=par["max_overlaps_neg"] - ) - df_pos.to_csv(work_dir / "pos_marker.csv") - df_neg.to_csv(work_dir / "neg_marker.csv") - - # Process transcripts - print("Processing transcripts", flush=True) - transcripts_df = sdata.points["transcripts"].compute() - if par["single_cell_ref"]: - transcripts_df = transcripts_df[transcripts_df["feature_name"].isin(shared_genes)] - - # Ensure correct data types - for col in ['x', 'y', 'z']: - if col in transcripts_df.columns: - transcripts_df[col] = transcripts_df[col].astype(float) - transcripts_df['feature_name'] = transcripts_df['feature_name'].astype(str) - - # Save transcripts - transcripts_df.to_csv(work_dir / "transcript.csv.gz", compression='gzip', index=False) - - # Create BIDCell config - print("Creating BIDCell configuration", flush=True) - config_path = create_bidcell_config( - work_dir, - epochs=par["model_epochs"], - min_size=par["min_cell_size"] - ) - - # Run BIDCell (mock implementation for now) - print("Running BIDCell segmentation", flush=True) - - # For now, create a simple watershed-based segmentation as placeholder - from skimage import filters, segmentation, measure - from scipy import ndimage - - # Simple preprocessing and segmentation - img_blur = filters.gaussian(img.astype(float), sigma=1) - threshold = filters.threshold_otsu(img_blur) - binary = img_blur > threshold - - # Distance transform and watershed - distance = ndimage.distance_transform_edt(binary) - local_maxima = filters.peaks_local_maxima(distance, min_distance=par["min_cell_size"]) - markers = measure.label(local_maxima) - segmentation_mask = segmentation.watershed(-distance, markers, mask=binary) - - # Remove small objects - segmentation_mask = segmentation.clear_border(segmentation_mask) - props = measure.regionprops(segmentation_mask) - for prop in props: - if prop.area < par["min_cell_size"]: - segmentation_mask[segmentation_mask == prop.label] = 0 - - # Relabel to ensure continuous labels - segmentation_mask = measure.label(segmentation_mask > 0) - - print(f"Segmentation completed with {segmentation_mask.max()} cells", flush=True) - - # Create output SpatialData - print("Creating output spatial data", flush=True) - - # Create labels layer - labels = sd.models.Labels2DModel.parse( - segmentation_mask.astype(np.uint32), - dims=('y', 'x') - ) - - # Create minimal table for compatibility - n_cells = int(segmentation_mask.max()) - obs_df = pd.DataFrame({ - "cell_id": [f"cell_{i}" for i in range(1, n_cells + 1)], - "region": ["region_0"] * n_cells - }) - obs_df.index = obs_df.index.astype(str) - - var_df = pd.DataFrame(index=pd.Index([], dtype='object', name='feature_name')) - - table = ad.AnnData( - obs=obs_df, - var=var_df, - X=np.empty((n_cells, 0)) - ) - - # Create output SpatialData - output_sdata = sd.SpatialData( - labels={"segmentation": labels}, - tables={"table": table} - ) - - # Write output - print("Writing output", flush=True) - if os.path.exists(par["output"]): - shutil.rmtree(par["output"]) - output_sdata.write(par["output"]) - - print("BIDCell segmentation completed successfully", flush=True) - - except Exception as e: - logging.error(f"BIDCell segmentation failed: {str(e)}") - sys.exit(1) - - finally: - # Clean up temporary directory - if work_dir.exists(): - shutil.rmtree(work_dir) - -if __name__ == "__main__": - main() -VIASHMAIN -python -B "\$tempscript" & -wait "\$!" - -VIASHEOF - - -if [[ "$VIASH_ENGINE_TYPE" == "docker" ]]; then - # strip viash automount from file paths - - if [ ! -z "$VIASH_PAR_INPUT" ]; then - VIASH_PAR_INPUT=$(ViashDockerStripAutomount "$VIASH_PAR_INPUT") - fi - if [ ! -z "$VIASH_PAR_OUTPUT" ]; then - VIASH_PAR_OUTPUT=$(ViashDockerStripAutomount "$VIASH_PAR_OUTPUT") - fi - if [ ! -z "$VIASH_PAR_SINGLE_CELL_REF" ]; then - VIASH_PAR_SINGLE_CELL_REF=$(ViashDockerStripAutomount "$VIASH_PAR_SINGLE_CELL_REF") - fi - if [ ! -z "$VIASH_META_RESOURCES_DIR" ]; then - VIASH_META_RESOURCES_DIR=$(ViashDockerStripAutomount "$VIASH_META_RESOURCES_DIR") - fi - if [ ! -z "$VIASH_META_EXECUTABLE" ]; then - VIASH_META_EXECUTABLE=$(ViashDockerStripAutomount "$VIASH_META_EXECUTABLE") - fi - if [ ! -z "$VIASH_META_CONFIG" ]; then - VIASH_META_CONFIG=$(ViashDockerStripAutomount "$VIASH_META_CONFIG") - fi - if [ ! -z "$VIASH_META_TEMP_DIR" ]; then - VIASH_META_TEMP_DIR=$(ViashDockerStripAutomount "$VIASH_META_TEMP_DIR") - fi -fi - - -# check whether required files exist -if [ ! -z "$VIASH_PAR_OUTPUT" ] && [ ! -e "$VIASH_PAR_OUTPUT" ]; then - ViashError "Output file '$VIASH_PAR_OUTPUT' does not exist." - exit 1 -fi - - -exit 0 diff --git a/src/methods_segmentation/bidcell/script.py b/src/methods_segmentation/bidcell/script.py index 1429aa8ff..9c1754194 100644 --- a/src/methods_segmentation/bidcell/script.py +++ b/src/methods_segmentation/bidcell/script.py @@ -1,38 +1,33 @@ #!/usr/bin/env python3 import spatialdata as sd -import anndata as ad +import scanpy as sc import numpy as np import pandas as pd -import scanpy as sc import tifffile import cv2 import natsort -import os -import tempfile -import shutil -import yaml -import logging +import os import sys +import logging +import tempfile from pathlib import Path +from bidcell import BIDCellModel ## VIASH START par = { - "input": "resources_test/task_ist_preprocessing/mouse_brain_combined/raw_ist.zarr", - "output": "segmentation.zarr", - "single_cell_ref": None, - "max_overlaps_pos": 4, - "max_overlaps_neg": 15, - "model_epochs": 10, - "min_cell_size": 15 -} -meta = { - "name": "bidcell" + 'input': 'resources_test/task_ist_preprocessing/mouse_brain_combined/raw_ist.zarr', + 'output': 'output.zarr', + 'single_cell_ref': 'resources_test/common/2023_yao_mouse_brain_scrnaseq_10xv2/dataset.h5ad', + 'max_overlaps_pos': 4, + 'max_overlaps_neg': 15, + 'model_epochs': 10, + 'min_cell_size': 15 } ## VIASH END def generate_markers(ref_df, max_overlaps_pos=4, max_overlaps_neg=15): - """Generate positive and negative marker genes from single-cell reference.""" + """Generate positive and negative marker genes for cell types.""" n_genes = ref_df.shape[1] - 3 # Exclude ct_idx, cell_type, atlas columns cell_types = natsort.natsorted(list(set(ref_df["cell_type"].tolist()))) n_cell_types = len(cell_types) @@ -41,7 +36,7 @@ def generate_markers(ref_df, max_overlaps_pos=4, max_overlaps_neg=15): gene_names = ref_df.columns[:n_genes] ct_idx = ref_df["ct_idx"].to_numpy() - # Generate negative markers (genes with low expression in specific cell types) + # Generate negative markers (genes with low expression) pct_10 = np.percentile(ref_expr, 10, axis=1, keepdims=True) pct_10 = np.tile(pct_10, (1, n_genes)) low_expr_true = np.zeros(pct_10.shape) @@ -58,7 +53,7 @@ def generate_markers(ref_df, max_overlaps_pos=4, max_overlaps_neg=15): low_expr_true_agg[:, too_many] = 0 df_neg = pd.DataFrame(low_expr_true_agg, index=cell_types, columns=gene_names) - # Generate positive markers (genes with high expression in specific cell types) + # Generate positive markers (genes with high expression) pct_90 = np.percentile(ref_expr, 90, axis=1, keepdims=True) pct_90 = np.tile(pct_90, (1, n_genes)) high_expr_true = np.zeros(pct_90.shape) @@ -77,242 +72,185 @@ def generate_markers(ref_df, max_overlaps_pos=4, max_overlaps_neg=15): return df_pos, df_neg -def create_bidcell_config(work_dir, epochs=10, min_size=15): - """Create BIDCell configuration YAML file.""" - config = { - "data_path": str(work_dir), - "image_name": "morphology_mip_pyramidal.tiff", - "transcript_file": "transcript.csv.gz", - "pos_marker_file": "pos_marker.csv", - "neg_marker_file": "neg_marker.csv", - "scref_file": "scref.csv", - "output_path": str(work_dir), - "model_params": { - "epochs": epochs, - "min_cell_size": min_size - } - } +def main(): + logging.basicConfig(level=logging.INFO) - config_path = work_dir / "config.yaml" - with open(config_path, 'w') as f: - yaml.dump(config, f, default_flow_style=False) + # Load spatial data + logging.info(f"Loading spatial data from {par['input']}") + sdata = sd.read_zarr(par['input']) - return config_path - -def main(): - print("Starting BIDCell segmentation", flush=True) + # Log data characteristics + logging.info(f"Loaded spatial data with components: {list(sdata)}") - # Create temporary working directory - work_dir = Path(tempfile.mkdtemp()) + # Get gene list from spatial transcripts + sdata_genes = sdata['transcripts']["feature_name"].unique().compute().sort_values().tolist() + logging.info(f"Found {len(sdata_genes)} unique genes in spatial data") - try: - # Load input spatial data - print("Loading input spatial data", flush=True) - sdata = sd.read_zarr(par["input"]) - - # Validate required components - if "transcripts" not in sdata.points: - raise ValueError("Input data must contain transcripts in points layer") + # Create temporary working directory + with tempfile.TemporaryDirectory() as temp_dir: + temp_path = Path(temp_dir) - # Get available image keys for morphology - image_keys = list(sdata.images.keys()) - morphology_key = None - for key in ["morphology_mip", "morphology", "image", "dapi"]: - if key in image_keys: - morphology_key = key - break + try: + # Extract DAPI image for BIDCell + logging.info("Extracting morphology image") + img = sdata["morphology_mip"]["scale0"]["image"].values + img = np.squeeze(img) + + morphology_path = temp_path / "morphology_mip_pyramidal.tiff" + with tifffile.TiffWriter(morphology_path, bigtiff=True) as tiff: + tiff.write(img, photometric="minisblack", resolution=(1, 1)) + + # Process single-cell reference data if provided + if par['single_cell_ref'] and os.path.exists(par['single_cell_ref']): + logging.info(f"Loading single-cell reference from {par['single_cell_ref']}") + adata = sc.read_h5ad(par['single_cell_ref']) - if morphology_key is None: - raise ValueError(f"No morphology image found. Available keys: {image_keys}") - - print(f"Using morphology image: {morphology_key}", flush=True) - - # Extract genes from spatial data - sdata_genes = sdata.points["transcripts"]["feature_name"].unique().compute().sort_values().tolist() - print(f"Found {len(sdata_genes)} genes in spatial data", flush=True) - - # Extract morphology image - print("Extracting morphology image", flush=True) - if hasattr(sdata.images[morphology_key], 'data'): - img_data = sdata.images[morphology_key].data - else: - img_data = sdata.images[morphology_key] + # Filter to shared genes + shared_genes = [g for g in sdata_genes if g in adata.var["feature_name"].values] + logging.info(f"Found {len(shared_genes)} shared genes between spatial and scRNA-seq data") + + if len(shared_genes) == 0: + raise ValueError("No shared genes found between spatial and single-cell reference data") + + adata = adata[:, adata.var["feature_name"].isin(shared_genes)] + adata.var_names = adata.var["feature_name"].astype(str) + + # Create reference dataframe for BIDCell + # Use normalized layer if available, otherwise X + if "normalized" in adata.layers: + expr_data = adata[:, shared_genes].layers["normalized"].toarray() + else: + expr_data = adata[:, shared_genes].X.toarray() + + sc_ref = pd.DataFrame( + data=expr_data, + columns=shared_genes, + index=range(adata.n_obs) + ) + + # Add cell type information + if 'cell_type' not in adata.obs.columns: + logging.warning("No 'cell_type' column found in reference data, using dummy cell type") + adata.obs['cell_type'] = 'Unknown' + + cell_type_col = adata.obs['cell_type'].astype('category') + sc_ref["ct_idx"] = cell_type_col.cat.codes.values + sc_ref["cell_type"] = cell_type_col.values + sc_ref["atlas"] = "custom" + + # Save reference data + scref_path = temp_path / "scref.csv" + sc_ref.to_csv(scref_path) + + # Generate marker files + logging.info("Generating positive and negative marker genes") + df_pos, df_neg = generate_markers( + sc_ref, + max_overlaps_pos=par['max_overlaps_pos'], + max_overlaps_neg=par['max_overlaps_neg'] + ) + + pos_marker_path = temp_path / "pos_marker.csv" + neg_marker_path = temp_path / "neg_marker.csv" + df_pos.to_csv(pos_marker_path) + df_neg.to_csv(neg_marker_path) + + # Filter transcripts to shared genes + transcript = sdata["transcripts"].compute() + transcript_filtered = transcript[transcript["feature_name"].isin(shared_genes)] + + else: + logging.warning("No single-cell reference provided, using all genes") + transcript_filtered = sdata["transcripts"].compute() + shared_genes = sdata_genes - if hasattr(img_data, 'values'): - img = img_data.values - else: - img = np.array(img_data) + # Save transcript data for BIDCell + transcript_path = temp_path / "transcript.csv.gz" + pd.DataFrame(transcript_filtered).to_csv(transcript_path, compression='gzip') - # Handle different image formats - if img.ndim == 3: - img = np.squeeze(img) - if img.ndim != 2: - raise ValueError(f"Expected 2D image, got {img.ndim}D") + # Create BIDCell configuration file + config = { + 'data_path': str(temp_path), + 'morphology_path': str(morphology_path), + 'transcript_path': str(transcript_path), + 'epochs': par['model_epochs'], + 'min_cell_size': par['min_cell_size'] + } - # Save morphology image - morphology_path = work_dir / "morphology_mip_pyramidal.tiff" - tifffile.imwrite(morphology_path, img.astype(np.uint16)) - - # Process single-cell reference if provided - if par["single_cell_ref"]: - print("Processing single-cell reference", flush=True) - adata = sc.read_h5ad(par["single_cell_ref"]) + if par['single_cell_ref'] and os.path.exists(par['single_cell_ref']): + config.update({ + 'scref_path': str(scref_path), + 'pos_marker_path': str(pos_marker_path), + 'neg_marker_path': str(neg_marker_path) + }) - # Find shared genes - shared_genes = [g for g in sdata_genes if g in adata.var["feature_name"].values] - print(f"Found {len(shared_genes)} shared genes", flush=True) + config_path = temp_path / "bidcell_config.yaml" + import yaml + with open(config_path, 'w') as f: + yaml.dump(config, f) - if len(shared_genes) < 10: - print("Warning: Very few shared genes found, segmentation may be poor") + # Run BIDCell + logging.info("Running BIDCell segmentation") + model = BIDCellModel(str(config_path)) + model.run_pipeline() - # Filter reference to shared genes - adata = adata[:, adata.var["feature_name"].isin(shared_genes)] - adata.var_names = adata.var["feature_name"].astype(str) + # Process BIDCell output + logging.info("Processing BIDCell output") + dapi_image = tifffile.imread(morphology_path) - # Create reference dataframe - if "normalized" in adata.layers: - expr_data = adata[:, shared_genes].layers["normalized"] - elif "X" in adata.layers: - expr_data = adata[:, shared_genes].layers["X"] - else: - expr_data = adata[:, shared_genes].X - - if hasattr(expr_data, 'toarray'): - expr_data = expr_data.toarray() - - sc_ref = pd.DataFrame( - data=expr_data, - columns=shared_genes, - index=range(adata.n_obs) + # Look for BIDCell output file (adjust name based on actual output) + output_files = list(temp_path.glob("*connected.tif")) + if not output_files: + output_files = list(temp_path.glob("segmentation*.tif")) + + if not output_files: + raise FileNotFoundError("BIDCell segmentation output not found") + + segmentation_mask = tifffile.imread(output_files[0]) + h_dapi, w_dapi = dapi_image.shape + + # Resize segmentation to match DAPI image + segmentation_resized = cv2.resize( + segmentation_mask.astype('float32'), + (w_dapi, h_dapi), + interpolation=cv2.INTER_NEAREST ) + segmentation_resized = segmentation_resized.astype(np.uint32) - # Add cell type information - if "cell_type" in adata.obs: - cell_type_col = adata.obs['cell_type'].astype('category') - elif "celltype" in adata.obs: - cell_type_col = adata.obs['celltype'].astype('category') - else: - # Create dummy cell types - print("No cell type information found, using dummy types") - cell_type_col = pd.Categorical(['Type1'] * adata.n_obs) - - sc_ref["ct_idx"] = cell_type_col.cat.codes.values - sc_ref["cell_type"] = cell_type_col.values - sc_ref["atlas"] = "custom" + # Create output SpatialData + logging.info("Creating output SpatialData") + + # Prepare images + image_with_channel = np.expand_dims(dapi_image, axis=0) + images = sd.models.Image2DModel.parse(image_with_channel, dims=('c', 'y', 'x')) - # Save reference - sc_ref.to_csv(work_dir / "scref.csv", index=False) + # Prepare labels (segmentation) + labels = sd.models.Labels2DModel.parse(segmentation_resized, dims=('y', 'x')) - # Generate markers - print("Generating marker genes", flush=True) - df_pos, df_neg = generate_markers( - sc_ref, - max_overlaps_pos=par["max_overlaps_pos"], - max_overlaps_neg=par["max_overlaps_neg"] + # Prepare points (transcripts) + transcript_df = pd.DataFrame(transcript_filtered) + transcript_df['x'] = transcript_df['x'].astype(float) + transcript_df['y'] = transcript_df['y'].astype(float) + transcript_df['z'] = transcript_df['z'].astype(float) + transcript_df['feature_name'] = transcript_df['feature_name'].astype(str) + points = sd.models.PointsModel.parse(transcript_df) + + # Create output SpatialData object + output_sdata = sd.SpatialData( + images={'morphology_mip': images}, + labels={'segmentation': labels}, + points={'transcripts': points} ) - df_pos.to_csv(work_dir / "pos_marker.csv") - df_neg.to_csv(work_dir / "neg_marker.csv") - - # Process transcripts - print("Processing transcripts", flush=True) - transcripts_df = sdata.points["transcripts"].compute() - if par["single_cell_ref"]: - transcripts_df = transcripts_df[transcripts_df["feature_name"].isin(shared_genes)] - - # Ensure correct data types - for col in ['x', 'y', 'z']: - if col in transcripts_df.columns: - transcripts_df[col] = transcripts_df[col].astype(float) - transcripts_df['feature_name'] = transcripts_df['feature_name'].astype(str) - - # Save transcripts - transcripts_df.to_csv(work_dir / "transcript.csv.gz", compression='gzip', index=False) - - # Create BIDCell config - print("Creating BIDCell configuration", flush=True) - config_path = create_bidcell_config( - work_dir, - epochs=par["model_epochs"], - min_size=par["min_cell_size"] - ) - - # Run BIDCell (mock implementation for now) - print("Running BIDCell segmentation", flush=True) - - # For now, create a simple watershed-based segmentation as placeholder - from skimage import filters, segmentation, measure - from scipy import ndimage - - # Simple preprocessing and segmentation - img_blur = filters.gaussian(img.astype(float), sigma=1) - threshold = filters.threshold_otsu(img_blur) - binary = img_blur > threshold - - # Distance transform and watershed - distance = ndimage.distance_transform_edt(binary) - local_maxima = filters.peaks_local_maxima(distance, min_distance=par["min_cell_size"]) - markers = measure.label(local_maxima) - segmentation_mask = segmentation.watershed(-distance, markers, mask=binary) - - # Remove small objects - segmentation_mask = segmentation.clear_border(segmentation_mask) - props = measure.regionprops(segmentation_mask) - for prop in props: - if prop.area < par["min_cell_size"]: - segmentation_mask[segmentation_mask == prop.label] = 0 - - # Relabel to ensure continuous labels - segmentation_mask = measure.label(segmentation_mask > 0) - - print(f"Segmentation completed with {segmentation_mask.max()} cells", flush=True) - - # Create output SpatialData - print("Creating output spatial data", flush=True) - - # Create labels layer - labels = sd.models.Labels2DModel.parse( - segmentation_mask.astype(np.uint32), - dims=('y', 'x') - ) - - # Create minimal table for compatibility - n_cells = int(segmentation_mask.max()) - obs_df = pd.DataFrame({ - "cell_id": [f"cell_{i}" for i in range(1, n_cells + 1)], - "region": ["region_0"] * n_cells - }) - obs_df.index = obs_df.index.astype(str) - - var_df = pd.DataFrame(index=pd.Index([], dtype='object', name='feature_name')) - - table = ad.AnnData( - obs=obs_df, - var=var_df, - X=np.empty((n_cells, 0)) - ) - - # Create output SpatialData - output_sdata = sd.SpatialData( - labels={"segmentation": labels}, - tables={"table": table} - ) - - # Write output - print("Writing output", flush=True) - if os.path.exists(par["output"]): - shutil.rmtree(par["output"]) - output_sdata.write(par["output"]) - - print("BIDCell segmentation completed successfully", flush=True) - - except Exception as e: - logging.error(f"BIDCell segmentation failed: {str(e)}") - sys.exit(1) - - finally: - # Clean up temporary directory - if work_dir.exists(): - shutil.rmtree(work_dir) + + # Write output + logging.info(f"Writing output to {par['output']}") + output_sdata.write(par['output'], overwrite=True) + + logging.info("BIDCell segmentation completed successfully") + + except Exception as e: + logging.error(f"BIDCell segmentation failed: {str(e)}") + sys.exit(1) if __name__ == "__main__": main() \ No newline at end of file From 09bba403d2622d8bda8fd6f2b9d41d8b100e173e Mon Sep 17 00:00:00 2001 From: Kraftfahrzeughaftpflichtversicherung Date: Mon, 15 Sep 2025 15:47:59 +0200 Subject: [PATCH 3/8] bidcell input name --- .../bidcell/config.vsh.yaml | 15 ++++++------ src/methods_segmentation/bidcell/script.py | 23 +++++++++++++++---- 2 files changed, 26 insertions(+), 12 deletions(-) diff --git a/src/methods_segmentation/bidcell/config.vsh.yaml b/src/methods_segmentation/bidcell/config.vsh.yaml index 1e7d3f1af..7bba15de2 100644 --- a/src/methods_segmentation/bidcell/config.vsh.yaml +++ b/src/methods_segmentation/bidcell/config.vsh.yaml @@ -11,11 +11,10 @@ references: __merge__: /src/api/comp_method_segmentation.yaml arguments: - - name: --single_cell_ref - type: file - description: "Path to single-cell reference data in H5AD format for marker gene identification" + - name: --input_scrnaseq_reference + __merge__: /src/api/file_scrnaseq_reference.yaml required: false - default: "resources_test/common/2023_yao_mouse_brain_scrnaseq_10xv2/dataset.h5ad" + direction: input - name: --max_overlaps_pos type: integer default: 4 @@ -48,9 +47,9 @@ engines: image: openproblems/base_python:1 setup: - type: python - pypi: "numpy<2.0" + pypi: "numpy>=1.24.4,<2.0.0" - type: python - pypi: "imgaug<0.4.1" + pypi: "imgaug==0.4.0" - type: python pypi: spatialdata - type: python @@ -61,8 +60,6 @@ engines: pypi: opencv-python - type: python pypi: natsort - - type: python - pypi: bidcell - type: python pypi: dask[dataframe] - type: python @@ -75,4 +72,6 @@ engines: pypi: torch - type: python pypi: torchvision + - type: python + pypi: "bidcell" - type: native diff --git a/src/methods_segmentation/bidcell/script.py b/src/methods_segmentation/bidcell/script.py index 9c1754194..29878b504 100644 --- a/src/methods_segmentation/bidcell/script.py +++ b/src/methods_segmentation/bidcell/script.py @@ -18,7 +18,7 @@ par = { 'input': 'resources_test/task_ist_preprocessing/mouse_brain_combined/raw_ist.zarr', 'output': 'output.zarr', - 'single_cell_ref': 'resources_test/common/2023_yao_mouse_brain_scrnaseq_10xv2/dataset.h5ad', + 'single_cell_ref': None, 'max_overlaps_pos': 4, 'max_overlaps_neg': 15, 'model_epochs': 10, @@ -101,10 +101,25 @@ def main(): tiff.write(img, photometric="minisblack", resolution=(1, 1)) # Process single-cell reference data if provided - if par['single_cell_ref'] and os.path.exists(par['single_cell_ref']): + # First check if there's an scRNA-seq reference in the spatial data itself + if 'scrnaseq_reference' in sdata.tables: + logging.info("Using scRNA-seq reference from input spatial data") + adata = sdata.tables['scrnaseq_reference'] + elif par.get('single_cell_ref') and os.path.exists(par['single_cell_ref']): logging.info(f"Loading single-cell reference from {par['single_cell_ref']}") adata = sc.read_h5ad(par['single_cell_ref']) + else: + logging.info("No single-cell reference found, using scrnaseq_reference.h5ad from test data") + # Try to use the scrnaseq_reference.h5ad that should be in the same directory as raw_ist.zarr + input_dir = os.path.dirname(par['input']) + ref_path = os.path.join(input_dir, 'scrnaseq_reference.h5ad') + if os.path.exists(ref_path): + logging.info(f"Found scRNA-seq reference at {ref_path}") + adata = sc.read_h5ad(ref_path) + else: + adata = None + if adata is not None: # Filter to shared genes shared_genes = [g for g in sdata_genes if g in adata.var["feature_name"].values] logging.info(f"Found {len(shared_genes)} shared genes between spatial and scRNA-seq data") @@ -177,7 +192,7 @@ def main(): 'min_cell_size': par['min_cell_size'] } - if par['single_cell_ref'] and os.path.exists(par['single_cell_ref']): + if adata is not None: config.update({ 'scref_path': str(scref_path), 'pos_marker_path': str(pos_marker_path), @@ -253,4 +268,4 @@ def main(): sys.exit(1) if __name__ == "__main__": - main() \ No newline at end of file + main() From a41b0f533d21bb4e45ddc0f7395dbf31e4ebf596 Mon Sep 17 00:00:00 2001 From: Kraftfahrzeughaftpflichtversicherung Date: Mon, 15 Sep 2025 16:21:07 +0200 Subject: [PATCH 4/8] bidcell added --- src/methods_segmentation/bidcell/config.vsh.yaml | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/src/methods_segmentation/bidcell/config.vsh.yaml b/src/methods_segmentation/bidcell/config.vsh.yaml index 7bba15de2..85a61e535 100644 --- a/src/methods_segmentation/bidcell/config.vsh.yaml +++ b/src/methods_segmentation/bidcell/config.vsh.yaml @@ -47,9 +47,17 @@ engines: image: openproblems/base_python:1 setup: - type: python - pypi: "numpy>=1.24.4,<2.0.0" - - type: python + pypi: "numpy==1.26.4" + - type: python pypi: "imgaug==0.4.0" + - type: python + pypi: "scipy<1.14" + - type: python + pypi: "scikit-learn<1.6" + - type: python + pypi: "dask==2024.11.2" + - type: python + pypi: "bidcell" - type: python pypi: spatialdata - type: python @@ -60,8 +68,6 @@ engines: pypi: opencv-python - type: python pypi: natsort - - type: python - pypi: dask[dataframe] - type: python pypi: spatialdata-plot - type: python @@ -72,6 +78,4 @@ engines: pypi: torch - type: python pypi: torchvision - - type: python - pypi: "bidcell" - type: native From 55828d12999ba8e6a12261c106dbe639ced0616a Mon Sep 17 00:00:00 2001 From: Kraftfahrzeughaftpflichtversicherung Date: Thu, 18 Sep 2025 11:05:51 +0200 Subject: [PATCH 5/8] backup --- .../bidcell/config.vsh.yaml | 47 ++++++++++++------- 1 file changed, 30 insertions(+), 17 deletions(-) diff --git a/src/methods_segmentation/bidcell/config.vsh.yaml b/src/methods_segmentation/bidcell/config.vsh.yaml index 85a61e535..fc2706fce 100644 --- a/src/methods_segmentation/bidcell/config.vsh.yaml +++ b/src/methods_segmentation/bidcell/config.vsh.yaml @@ -46,36 +46,49 @@ engines: - type: docker image: openproblems/base_python:1 setup: + - type: apt + packages: + - libgl1-mesa-dri + - libglib2.0-0 + - libsm6 + - libxext6 + - libfontconfig1 + - libxrender1 + - build-essential + - git - type: python - pypi: "numpy==1.26.4" - - type: python - pypi: "imgaug==0.4.0" + pip: --force-reinstall numpy==1.24.4 scipy==1.11.1 scikit-image==0.21.0 - type: python - pypi: "scipy<1.14" + pypi: tifffile==2023.7.18 - type: python - pypi: "scikit-learn<1.6" + pypi: natsort==8.4.0 - type: python - pypi: "dask==2024.11.2" + pypi: opencv-python-headless==4.8.0.76 - type: python - pypi: "bidcell" + pypi: imgaug==0.4.0 - type: python - pypi: spatialdata + pypi: matplotlib==3.7.2 - type: python - pypi: scanpy + pypi: pandas==2.0.3 - type: python - pypi: tifffile + pypi: scikit-learn==1.3.0 - type: python - pypi: opencv-python + pypi: tqdm==4.65.0 - type: python - pypi: natsort + pypi: pyyaml==6.0.1 - type: python - pypi: spatialdata-plot + pypi: cellpose + - type: python + pypi: bidcell - type: python - pypi: pyyaml + pypi: spatialdata - type: python - pypi: scikit-image + pypi: scanpy - type: python - pypi: torch + pypi: dask[dataframe] - type: python - pypi: torchvision + pypi: spatialdata-plot + __merge__: + - /src/base/setup_txsim_partial.yaml - type: native + From b9d415d99dc7b86bce76cb85a592cc6134ec49c6 Mon Sep 17 00:00:00 2001 From: Kraftfahrzeughaftpflichtversicherung Date: Fri, 19 Sep 2025 13:13:58 +0200 Subject: [PATCH 6/8] first batch of changes --- .../bidcell/config.vsh.yaml | 48 +- src/methods_segmentation/bidcell/script.py | 436 ++++++++---------- .../bidcell/testdata.yaml | 56 +++ 3 files changed, 281 insertions(+), 259 deletions(-) create mode 100644 src/methods_segmentation/bidcell/testdata.yaml diff --git a/src/methods_segmentation/bidcell/config.vsh.yaml b/src/methods_segmentation/bidcell/config.vsh.yaml index fc2706fce..905920b52 100644 --- a/src/methods_segmentation/bidcell/config.vsh.yaml +++ b/src/methods_segmentation/bidcell/config.vsh.yaml @@ -15,6 +15,10 @@ arguments: __merge__: /src/api/file_scrnaseq_reference.yaml required: false direction: input + - name: --temp + type: string + default: "./temp_bidcell/" + description: "Temporary directory for BIDCell intermediate files" - name: --max_overlaps_pos type: integer default: 4 @@ -48,36 +52,38 @@ engines: setup: - type: apt packages: - - libgl1-mesa-dri - - libglib2.0-0 - - libsm6 - - libxext6 - - libfontconfig1 - - libxrender1 - - build-essential - - git - - type: python - pip: --force-reinstall numpy==1.24.4 scipy==1.11.1 scikit-image==0.21.0 + - libgl1 + - libgl1-mesa-dri + - libglib2.0-0 + - libsm6 + - libxext6 + - libfontconfig1 + - libxrender1 + - build-essential + - git + - type: python - pypi: tifffile==2023.7.18 + pip: scikit-image - type: python - pypi: natsort==8.4.0 + pypi: tifffile - type: python - pypi: opencv-python-headless==4.8.0.76 + pypi: natsort - type: python - pypi: imgaug==0.4.0 + pypi: opencv-python-headless - type: python - pypi: matplotlib==3.7.2 + pypi: imgaug - type: python - pypi: pandas==2.0.3 + pypi: matplotlib + #- type: python + #pypi: pandas - type: python - pypi: scikit-learn==1.3.0 + pypi: scikit-learn - type: python - pypi: tqdm==4.65.0 + pypi: tqdm - type: python - pypi: pyyaml==6.0.1 + pypi: pyyaml - type: python - pypi: cellpose + pypi: cellpose==3.1.1.2 - type: python pypi: bidcell - type: python @@ -88,6 +94,8 @@ engines: pypi: dask[dataframe] - type: python pypi: spatialdata-plot + - type: python + pypi: numpy >=1.24.4,<2.0.0 __merge__: - /src/base/setup_txsim_partial.yaml - type: native diff --git a/src/methods_segmentation/bidcell/script.py b/src/methods_segmentation/bidcell/script.py index 29878b504..052cacd8f 100644 --- a/src/methods_segmentation/bidcell/script.py +++ b/src/methods_segmentation/bidcell/script.py @@ -1,22 +1,20 @@ -#!/usr/bin/env python3 - import spatialdata as sd -import scanpy as sc +import spatialdata_plot as pl +import matplotlib.pyplot as plt import numpy as np -import pandas as pd import tifffile import cv2 +import dask.dataframe as dd +import scanpy as sc +import pandas as pd import natsort import os -import sys -import logging -import tempfile -from pathlib import Path from bidcell import BIDCellModel ## VIASH START par = { 'input': 'resources_test/task_ist_preprocessing/mouse_brain_combined/raw_ist.zarr', + 'temp': './temp/bidcell/', 'output': 'output.zarr', 'single_cell_ref': None, 'max_overlaps_pos': 4, @@ -26,246 +24,206 @@ } ## VIASH END +# defining the function generate_markers def generate_markers(ref_df, max_overlaps_pos=4, max_overlaps_neg=15): - """Generate positive and negative marker genes for cell types.""" - n_genes = ref_df.shape[1] - 3 # Exclude ct_idx, cell_type, atlas columns - cell_types = natsort.natsorted(list(set(ref_df["cell_type"].tolist()))) - n_cell_types = len(cell_types) + n_genes = ref_df.shape[1] - 3 + cell_types = natsort.natsorted(list(set(ref_df["cell_type"].tolist()))) + n_cell_types = len(cell_types) - ref_expr = ref_df.iloc[:, :n_genes].to_numpy() - gene_names = ref_df.columns[:n_genes] - ct_idx = ref_df["ct_idx"].to_numpy() + ref_expr = ref_df.iloc[:, :n_genes].to_numpy() + gene_names = ref_df.columns[:n_genes] + ct_idx = ref_df["ct_idx"].to_numpy() - # Generate negative markers (genes with low expression) - pct_10 = np.percentile(ref_expr, 10, axis=1, keepdims=True) - pct_10 = np.tile(pct_10, (1, n_genes)) - low_expr_true = np.zeros(pct_10.shape) - low_expr_true[ref_expr <= pct_10] = 1 + # Generate negative markers + pct_10 = np.percentile(ref_expr, 10, axis=1, keepdims=True) + pct_10 = np.tile(pct_10, (1, n_genes)) + low_expr_true = np.zeros(pct_10.shape) + low_expr_true[ref_expr <= pct_10] = 1 - low_expr_true_agg = np.zeros((n_cell_types, n_genes)) - for ct in range(n_cell_types): - rows = np.where(ct_idx == ct)[0] - low_expr_true_ct = low_expr_true[rows] - low_expr_true_agg[ct, :] = np.prod(low_expr_true_ct, axis=0) + low_expr_true_agg = np.zeros((n_cell_types, n_genes)) + for ct in range(n_cell_types): + rows = np.where(ct_idx == ct)[0] + low_expr_true_ct = low_expr_true[rows] + low_expr_true_agg[ct, :] = np.prod(low_expr_true_ct, axis=0) - overlaps = np.sum(low_expr_true_agg, 0) - too_many = np.where(overlaps > max_overlaps_neg)[0] - low_expr_true_agg[:, too_many] = 0 - df_neg = pd.DataFrame(low_expr_true_agg, index=cell_types, columns=gene_names) + overlaps = np.sum(low_expr_true_agg, 0) + too_many = np.where(overlaps > max_overlaps_neg)[0] + low_expr_true_agg[:, too_many] = 0 + df_neg = pd.DataFrame(low_expr_true_agg, index=cell_types, columns=gene_names) - # Generate positive markers (genes with high expression) - pct_90 = np.percentile(ref_expr, 90, axis=1, keepdims=True) - pct_90 = np.tile(pct_90, (1, n_genes)) - high_expr_true = np.zeros(pct_90.shape) - high_expr_true[ref_expr >= pct_90] = 1 + # Generate positive markers + pct_90 = np.percentile(ref_expr, 90, axis=1, keepdims=True) + pct_90 = np.tile(pct_90, (1, n_genes)) + high_expr_true = np.zeros(pct_90.shape) + high_expr_true[ref_expr >= pct_90] = 1 - high_expr_true_agg = np.zeros((n_cell_types, n_genes)) - for ct in range(n_cell_types): - rows = np.where(ct_idx == ct)[0] - high_expr_true_ct = high_expr_true[rows] - high_expr_true_agg[ct, :] = np.prod(high_expr_true_ct, axis=0) + high_expr_true_agg = np.zeros((n_cell_types, n_genes)) + for ct in range(n_cell_types): + rows = np.where(ct_idx == ct)[0] + high_expr_true_ct = high_expr_true[rows] + high_expr_true_agg[ct, :] = np.prod(high_expr_true_ct, axis=0) - overlaps = np.sum(high_expr_true_agg, 0) - too_many = np.where(overlaps > max_overlaps_pos)[0] - high_expr_true_agg[:, too_many] = 0 - df_pos = pd.DataFrame(high_expr_true_agg, index=cell_types, columns=gene_names) + overlaps = np.sum(high_expr_true_agg, 0) + too_many = np.where(overlaps > max_overlaps_pos)[0] + high_expr_true_agg[:, too_many] = 0 + df_pos = pd.DataFrame(high_expr_true_agg, index=cell_types, columns=gene_names) - return df_pos, df_neg + return df_pos, df_neg -def main(): - logging.basicConfig(level=logging.INFO) - - # Load spatial data - logging.info(f"Loading spatial data from {par['input']}") - sdata = sd.read_zarr(par['input']) - - # Log data characteristics - logging.info(f"Loaded spatial data with components: {list(sdata)}") - - # Get gene list from spatial transcripts - sdata_genes = sdata['transcripts']["feature_name"].unique().compute().sort_values().tolist() - logging.info(f"Found {len(sdata_genes)} unique genes in spatial data") +sdata = sd.read_zarr(par['input']) +sdata_genes = sdata['transcripts']["feature_name"].unique().compute().sort_values().tolist() +# Creation of the data for a yaml file - input for BIDCELL +# Extracting DAPI image from dataset.zarr +image_pyramid = [] +img = sdata["morphology_mip"]["/scale0"]["image"].values +img = np.squeeze(img) +image_pyramid.append(img) + +if not os.path.exists(par['temp']): + os.makedirs(par['temp']) + +# Save the TIFF file in the temporary directory +with tifffile.TiffWriter(f"{par['temp']}morphology_mip_pyramidal.tiff", bigtiff=True) as tiff: + for img in image_pyramid: + tiff.write(img, photometric="minisblack", resolution=(1, 1)) + +# Converting h5ad single cell reference to .csv +adata = sc.read_h5ad(par['input_scrnaseq_reference']) +shared_genes = [g for g in sdata_genes if g in adata.var["feature_name"].values] +adata = adata[:, adata.var["feature_name"].isin(shared_genes)] + +adata.var_names = adata.var["feature_name"].astype(str) +sc_ref = pd.DataFrame( + data=adata[:, shared_genes].layers["normalized"].toarray(), + columns=shared_genes, + index=range(adata.n_obs) + ) +celltypes = adata.obs['cell_type'].unique().tolist() +cell_type_col = adata.obs['cell_type'].astype('category') +sc_ref["ct_idx"] = cell_type_col.cat.codes.values +sc_ref["cell_type"] = cell_type_col.values +sc_ref["atlas"] = "custom" +sc_ref.to_csv(f"{par['temp']}scref.csv") + +# generating transcript map .csv from test data +transcript = sdata["transcripts"].compute() +transcript = pd.DataFrame(transcript) +transcript[transcript["feature_name"].isin(shared_genes)].to_csv(f"{par['temp']}transcript.csv.gz", compression='gzip') + +# generate positive and negative marker files +df_pos, df_neg = generate_markers(sc_ref, max_overlaps_pos=4, max_overlaps_neg=15) +df_pos.to_csv(f"{par['temp']}/pos_marker.csv") +df_neg.to_csv(f"{par['temp']}/neg_marker.csv") + + + +import yaml + +config = { + "cpus": 8, + "files": { + "data_dir": par['temp'], + "fp_dapi": f"{par['temp']}morphology_mip_pyramidal.tiff", + "fp_transcripts": f"{par['temp']}transcript.csv.gz", + "fp_ref": f"{par['temp']}scref.csv", + "fp_pos_markers": f"{par['temp']}pos_marker.csv", + "fp_neg_markers": f"{par['temp']}neg_marker.csv", + }, + "nuclei_fovs": { + "stitch_nuclei_fovs": False, + }, + "nuclei": { + "diameter": None, # leave as None to automatically compute + }, + "transcripts": { + "shift_to_origin": True, + "x_col": "x", + "y_col": "y", + "gene_col": "feature_name", + "transcripts_to_filter": [ + "NegControlProbe_", + "antisense_", + "NegControlCodeword_", + "BLANK_", + "Blank-", + "NegPrb", + ], + }, + "affine": { + "target_pix_um": 1.0, + "base_pix_x": 1.0, #0.2125, + "base_pix_y": 1.0, #0.2125, + "base_ts_x": 1.0, + "base_ts_y": 1.0, + "global_shift_x": 0, + "global_shift_y": 0, + }, + "model_params": { + "name": "custom", + "patch_size": 48, + "elongated": [ + '01 IT-ET Glut', '02 NP-CT-L6b Glut', '03 MOB-DG-IMN', '04 CGE GABA', '05 MGE GABA', '06 CNU GABA', '08 MH-LH Glut', '09 TH Glut', '11 HY GABA', '12 MOB-CR Glut', '13 CNU-HYa Glut', '14 CNU-HYa GABA', '16 MB Glut', '20 MB GABA', '28 Astro-Epen', '29 Oligo', '30 OEG', '31 Vascular', '32 Immune' + ], + }, + "training_params": { + "total_epochs": 1, + "total_steps": 60, + "ne_weight": 1.0, + "os_weight": 1.0, + "cc_weight": 1.0, + "ov_weight": 1.0, + "pos_weight": 1.0, + "neg_weight": 1.0, + }, + "testing_params": { + "test_epoch": 1, + "test_step": 60, + }, + "experiment_dirs": { + "dir_id": "last", + }, +} + +# Save YAML file +with open(f"{par['temp']}testdata.yaml", "w") as f: + yaml.dump(config, f, sort_keys=False) + + + + +# # Setting up and running BIDCell +model = BIDCellModel(f"{par['temp']}testdata.yaml") +model.run_pipeline() + + # Analysis and visualisation of BIDcell output +#dapi_image = tifffile.imread("morphology_mip_pyramidal.tiff") +#segmentation_mask = tifffile.imread("epoch_10_step_60_connected.tif") +#h_dapi, w_dapi = dapi_image.shape + +#segmentation_mask_resized = cv2.resize(segmentation_mask.astype('float32'), (w_dapi, h_dapi), interpolation=cv2.INTER_NEAREST) +#segmentation_mask_resized = segmentation_mask_resized.astype(np.uint32) +#segmentation_mask_resized = segmentation_mask_resized.transpose(1, 0) +#tifffile.imwrite("bidcellresult_resized.tif", segmentation_mask_resized) + + # creating bidcelloutput.zarr +#image = tifffile.imread("morphology_mip_pyramidal.tiff") +#image_with_channel = np.expand_dims(image, axis=0) +#label_image = tifffile.imread("bidcellresult_resized.tif") +#labels = sd.models.Labels2DModel.parse(label_image, dims=('y', 'x')) - # Create temporary working directory - with tempfile.TemporaryDirectory() as temp_dir: - temp_path = Path(temp_dir) - - try: - # Extract DAPI image for BIDCell - logging.info("Extracting morphology image") - img = sdata["morphology_mip"]["scale0"]["image"].values - img = np.squeeze(img) - - morphology_path = temp_path / "morphology_mip_pyramidal.tiff" - with tifffile.TiffWriter(morphology_path, bigtiff=True) as tiff: - tiff.write(img, photometric="minisblack", resolution=(1, 1)) - - # Process single-cell reference data if provided - # First check if there's an scRNA-seq reference in the spatial data itself - if 'scrnaseq_reference' in sdata.tables: - logging.info("Using scRNA-seq reference from input spatial data") - adata = sdata.tables['scrnaseq_reference'] - elif par.get('single_cell_ref') and os.path.exists(par['single_cell_ref']): - logging.info(f"Loading single-cell reference from {par['single_cell_ref']}") - adata = sc.read_h5ad(par['single_cell_ref']) - else: - logging.info("No single-cell reference found, using scrnaseq_reference.h5ad from test data") - # Try to use the scrnaseq_reference.h5ad that should be in the same directory as raw_ist.zarr - input_dir = os.path.dirname(par['input']) - ref_path = os.path.join(input_dir, 'scrnaseq_reference.h5ad') - if os.path.exists(ref_path): - logging.info(f"Found scRNA-seq reference at {ref_path}") - adata = sc.read_h5ad(ref_path) - else: - adata = None - - if adata is not None: - # Filter to shared genes - shared_genes = [g for g in sdata_genes if g in adata.var["feature_name"].values] - logging.info(f"Found {len(shared_genes)} shared genes between spatial and scRNA-seq data") - - if len(shared_genes) == 0: - raise ValueError("No shared genes found between spatial and single-cell reference data") - - adata = adata[:, adata.var["feature_name"].isin(shared_genes)] - adata.var_names = adata.var["feature_name"].astype(str) - - # Create reference dataframe for BIDCell - # Use normalized layer if available, otherwise X - if "normalized" in adata.layers: - expr_data = adata[:, shared_genes].layers["normalized"].toarray() - else: - expr_data = adata[:, shared_genes].X.toarray() - - sc_ref = pd.DataFrame( - data=expr_data, - columns=shared_genes, - index=range(adata.n_obs) - ) - - # Add cell type information - if 'cell_type' not in adata.obs.columns: - logging.warning("No 'cell_type' column found in reference data, using dummy cell type") - adata.obs['cell_type'] = 'Unknown' - - cell_type_col = adata.obs['cell_type'].astype('category') - sc_ref["ct_idx"] = cell_type_col.cat.codes.values - sc_ref["cell_type"] = cell_type_col.values - sc_ref["atlas"] = "custom" - - # Save reference data - scref_path = temp_path / "scref.csv" - sc_ref.to_csv(scref_path) - - # Generate marker files - logging.info("Generating positive and negative marker genes") - df_pos, df_neg = generate_markers( - sc_ref, - max_overlaps_pos=par['max_overlaps_pos'], - max_overlaps_neg=par['max_overlaps_neg'] - ) - - pos_marker_path = temp_path / "pos_marker.csv" - neg_marker_path = temp_path / "neg_marker.csv" - df_pos.to_csv(pos_marker_path) - df_neg.to_csv(neg_marker_path) - - # Filter transcripts to shared genes - transcript = sdata["transcripts"].compute() - transcript_filtered = transcript[transcript["feature_name"].isin(shared_genes)] - - else: - logging.warning("No single-cell reference provided, using all genes") - transcript_filtered = sdata["transcripts"].compute() - shared_genes = sdata_genes - - # Save transcript data for BIDCell - transcript_path = temp_path / "transcript.csv.gz" - pd.DataFrame(transcript_filtered).to_csv(transcript_path, compression='gzip') - - # Create BIDCell configuration file - config = { - 'data_path': str(temp_path), - 'morphology_path': str(morphology_path), - 'transcript_path': str(transcript_path), - 'epochs': par['model_epochs'], - 'min_cell_size': par['min_cell_size'] - } - - if adata is not None: - config.update({ - 'scref_path': str(scref_path), - 'pos_marker_path': str(pos_marker_path), - 'neg_marker_path': str(neg_marker_path) - }) - - config_path = temp_path / "bidcell_config.yaml" - import yaml - with open(config_path, 'w') as f: - yaml.dump(config, f) - - # Run BIDCell - logging.info("Running BIDCell segmentation") - model = BIDCellModel(str(config_path)) - model.run_pipeline() - - # Process BIDCell output - logging.info("Processing BIDCell output") - dapi_image = tifffile.imread(morphology_path) - - # Look for BIDCell output file (adjust name based on actual output) - output_files = list(temp_path.glob("*connected.tif")) - if not output_files: - output_files = list(temp_path.glob("segmentation*.tif")) - - if not output_files: - raise FileNotFoundError("BIDCell segmentation output not found") - - segmentation_mask = tifffile.imread(output_files[0]) - h_dapi, w_dapi = dapi_image.shape - - # Resize segmentation to match DAPI image - segmentation_resized = cv2.resize( - segmentation_mask.astype('float32'), - (w_dapi, h_dapi), - interpolation=cv2.INTER_NEAREST - ) - segmentation_resized = segmentation_resized.astype(np.uint32) - - # Create output SpatialData - logging.info("Creating output SpatialData") - - # Prepare images - image_with_channel = np.expand_dims(dapi_image, axis=0) - images = sd.models.Image2DModel.parse(image_with_channel, dims=('c', 'y', 'x')) - - # Prepare labels (segmentation) - labels = sd.models.Labels2DModel.parse(segmentation_resized, dims=('y', 'x')) - - # Prepare points (transcripts) - transcript_df = pd.DataFrame(transcript_filtered) - transcript_df['x'] = transcript_df['x'].astype(float) - transcript_df['y'] = transcript_df['y'].astype(float) - transcript_df['z'] = transcript_df['z'].astype(float) - transcript_df['feature_name'] = transcript_df['feature_name'].astype(str) - points = sd.models.PointsModel.parse(transcript_df) - - # Create output SpatialData object - output_sdata = sd.SpatialData( - images={'morphology_mip': images}, - labels={'segmentation': labels}, - points={'transcripts': points} - ) - - # Write output - logging.info(f"Writing output to {par['output']}") - output_sdata.write(par['output'], overwrite=True) - - logging.info("BIDCell segmentation completed successfully") - - except Exception as e: - logging.error(f"BIDCell segmentation failed: {str(e)}") - sys.exit(1) +#transcript_processed = pd.read_csv("data/transcript.csv.gz") +#transcript_processed['x'] = transcript_processed['x'].astype(float) +#transcript_processed['y'] = transcript_processed['y'].astype(float) +#transcript_processed['z'] = transcript_processed['z'].astype(float) +#transcript_processed['feature_name'] = transcript_processed['feature_name'].astype(str) + +#images = sd.models.Image2DModel.parse(image_with_channel, dims=('c', 'x', 'y')) +#labels = sd.models.Labels2DModel.parse(label_image, dims=('y', 'x')) +#points = sd.models.PointsModel.parse(transcript_processed) -if __name__ == "__main__": - main() +outputsdata = sd.SpatialData() +# images={'DAPI': images}, +# labels={'segmentation_mask_labels': labels}, +# points={'transcripts': points} +# ) +outputsdata.write(par['output'], overwrite=True) \ No newline at end of file diff --git a/src/methods_segmentation/bidcell/testdata.yaml b/src/methods_segmentation/bidcell/testdata.yaml new file mode 100644 index 000000000..c9c91e248 --- /dev/null +++ b/src/methods_segmentation/bidcell/testdata.yaml @@ -0,0 +1,56 @@ +src/methods_segmentation/bidcell/testdata.yaml + +cpus: 8 # number of CPUs for multiprocessing +files: + data_dir: ./temp/bidcell # data directory for processed/output data + fp_dapi: ./temp/bidcell/morphology_mip_pyramidal.tiff # path of DAPI image + fp_transcripts: ./temp/bidcell/transcript.csv.gz # path of transcripts file + fp_ref: ./temp/bidcell/scref.csv # file path of reference data + fp_pos_markers: ./temp/bidcell/pos_marker.csv # file path of positive markers + fp_neg_markers: ./temp/bidcell/neg_marker.csv # file path of negative markers +nuclei_fovs: + stitch_nuclei_fovs: False # set True to stitch separate FOVs of DAPI together in 1 image +nuclei: + diameter: # estimated diameter of nuclei for Cellpose; leave as None to automatically compute +transcripts: + shift_to_origin: True # shift to origin + x_col: x_location # name of x location column in transcripts file + y_col: y_location # name of y location column in transcripts file + gene_col: feature_name # name of genes column in transcripts file + transcripts_to_filter: + - NegControlProbe_ + - antisense_ + - NegControlCodeword_ + - BLANK_ + - Blank- + - NegPrb +affine: + target_pix_um: 1.0 # microns per pixel for segmentation + base_pix_x: 0.2125 # microns per pixel in width + base_pix_y: 0.2125 # microns per pixel in height + base_ts_x: 1.0 # convert transcripts locations to target pixels width + base_ts_y: 1.0 # convert transcripts locations to target pixels height + global_shift_x: 0 # adjustment aligning transcripts to DAPI in width + global_shift_y: 0 # adjustment aligning transcripts to DAPI in height +model_params: + name: custom # segmentation model to use: custom or set encoder name from segmentation_models_pytorch + patch_size: 48 # size of transcriptomic image patches for input to DL model + elongated: # list of elongated cell types in the single-cell reference + - Endothelial + - Fibroblasts + - Myofibroblasts + - SMC +training_params: + total_epochs: 1 # number of training epochs + total_steps: 60 # number of training steps + ne_weight: 1.0 # nuclei encapsulation loss weight + os_weight: 1.0 # oversegmentation loss weight + cc_weight: 1.0 # cell-calling loss weight + ov_weight: 1.0 # oversegmentation loss weight + pos_weight: 1.0 # positive marker loss weight + neg_weight: 1.0 # negative marker loss weight +testing_params: + test_epoch: 1 # epoch to test + test_step: 60 # step number to test +experiment_dirs: + dir_id: last # specify timestamp of output dir or leave blank to use latest dir \ No newline at end of file From 6621182daa43fc22e83867d0eab13596d773c8f5 Mon Sep 17 00:00:00 2001 From: Kraftfahrzeughaftpflichtversicherung Date: Thu, 25 Sep 2025 18:03:55 +0200 Subject: [PATCH 7/8] 2nd batch of changes --- .../bidcell/config.vsh.yaml | 6 +- src/methods_segmentation/bidcell/script.py | 162 ++++++++++-------- 2 files changed, 98 insertions(+), 70 deletions(-) diff --git a/src/methods_segmentation/bidcell/config.vsh.yaml b/src/methods_segmentation/bidcell/config.vsh.yaml index 905920b52..637b256eb 100644 --- a/src/methods_segmentation/bidcell/config.vsh.yaml +++ b/src/methods_segmentation/bidcell/config.vsh.yaml @@ -74,8 +74,8 @@ engines: pypi: imgaug - type: python pypi: matplotlib - #- type: python - #pypi: pandas + - type: python + pypi: pandas - type: python pypi: scikit-learn - type: python @@ -83,7 +83,7 @@ engines: - type: python pypi: pyyaml - type: python - pypi: cellpose==3.1.1.2 + pypi: cellpose==3.1.1.2 - type: python pypi: bidcell - type: python diff --git a/src/methods_segmentation/bidcell/script.py b/src/methods_segmentation/bidcell/script.py index 052cacd8f..884626970 100644 --- a/src/methods_segmentation/bidcell/script.py +++ b/src/methods_segmentation/bidcell/script.py @@ -13,7 +13,7 @@ ## VIASH START par = { - 'input': 'resources_test/task_ist_preprocessing/mouse_brain_combined/raw_ist.zarr', + 'input': "../task_ist_preprocessing/resources_test/common/2023_10x_mouse_brain_xenium/dataset.zarr", 'temp': './temp/bidcell/', 'output': 'output.zarr', 'single_cell_ref': None, @@ -26,78 +26,98 @@ # defining the function generate_markers def generate_markers(ref_df, max_overlaps_pos=4, max_overlaps_neg=15): - n_genes = ref_df.shape[1] - 3 - cell_types = natsort.natsorted(list(set(ref_df["cell_type"].tolist()))) - n_cell_types = len(cell_types) - - ref_expr = ref_df.iloc[:, :n_genes].to_numpy() - gene_names = ref_df.columns[:n_genes] - ct_idx = ref_df["ct_idx"].to_numpy() - - # Generate negative markers - pct_10 = np.percentile(ref_expr, 10, axis=1, keepdims=True) - pct_10 = np.tile(pct_10, (1, n_genes)) - low_expr_true = np.zeros(pct_10.shape) - low_expr_true[ref_expr <= pct_10] = 1 - - low_expr_true_agg = np.zeros((n_cell_types, n_genes)) - for ct in range(n_cell_types): - rows = np.where(ct_idx == ct)[0] - low_expr_true_ct = low_expr_true[rows] - low_expr_true_agg[ct, :] = np.prod(low_expr_true_ct, axis=0) - - overlaps = np.sum(low_expr_true_agg, 0) - too_many = np.where(overlaps > max_overlaps_neg)[0] - low_expr_true_agg[:, too_many] = 0 - df_neg = pd.DataFrame(low_expr_true_agg, index=cell_types, columns=gene_names) - - # Generate positive markers - pct_90 = np.percentile(ref_expr, 90, axis=1, keepdims=True) - pct_90 = np.tile(pct_90, (1, n_genes)) - high_expr_true = np.zeros(pct_90.shape) - high_expr_true[ref_expr >= pct_90] = 1 - - high_expr_true_agg = np.zeros((n_cell_types, n_genes)) - for ct in range(n_cell_types): - rows = np.where(ct_idx == ct)[0] - high_expr_true_ct = high_expr_true[rows] - high_expr_true_agg[ct, :] = np.prod(high_expr_true_ct, axis=0) - - overlaps = np.sum(high_expr_true_agg, 0) - too_many = np.where(overlaps > max_overlaps_pos)[0] - high_expr_true_agg[:, too_many] = 0 - df_pos = pd.DataFrame(high_expr_true_agg, index=cell_types, columns=gene_names) - - return df_pos, df_neg + """ + Generate positive and negative marker dataframes from reference data. + + Args: + ref_df (pd.DataFrame): Reference dataframe with gene expression data and cell type info + max_overlaps_pos (int): Maximum number of cell types that can share a positive marker + max_overlaps_neg (int): Maximum number of cell types that can share a negative marker + + Returns: + tuple: (df_pos, df_neg) - DataFrames containing positive and negative markers + """ + + n_genes = ref_df.shape[1] - 3 + cell_types = natsort.natsorted(list(set(ref_df["cell_type"].tolist()))) + n_cell_types = len(cell_types) + + ref_expr = ref_df.iloc[:, :n_genes].to_numpy() + gene_names = ref_df.columns[:n_genes] + ct_idx = ref_df["ct_idx"].to_numpy() + + # Generate negative markers + pct_10 = np.percentile(ref_expr, 10, axis=1, keepdims=True) + pct_10 = np.tile(pct_10, (1, n_genes)) + low_expr_true = np.zeros(pct_10.shape) + low_expr_true[ref_expr <= pct_10] = 1 + + low_expr_true_agg = np.zeros((n_cell_types, n_genes)) + for ct in range(n_cell_types): + rows = np.where(ct_idx == ct)[0] + low_expr_true_ct = low_expr_true[rows] + low_expr_true_agg[ct, :] = np.prod(low_expr_true_ct, axis=0) + + overlaps = np.sum(low_expr_true_agg, 0) + too_many = np.where(overlaps > max_overlaps_neg)[0] + low_expr_true_agg[:, too_many] = 0 + df_neg = pd.DataFrame(low_expr_true_agg, index=cell_types, columns=gene_names) + + # Generate positive markers + pct_90 = np.percentile(ref_expr, 90, axis=1, keepdims=True) + pct_90 = np.tile(pct_90, (1, n_genes)) + high_expr_true = np.zeros(pct_90.shape) + high_expr_true[ref_expr >= pct_90] = 1 + + high_expr_true_agg = np.zeros((n_cell_types, n_genes)) + for ct in range(n_cell_types): + rows = np.where(ct_idx == ct)[0] + high_expr_true_ct = high_expr_true[rows] + high_expr_true_agg[ct, :] = np.prod(high_expr_true_ct, axis=0) + + overlaps = np.sum(high_expr_true_agg, 0) + too_many = np.where(overlaps > max_overlaps_pos)[0] + high_expr_true_agg[:, too_many] = 0 + df_pos = pd.DataFrame(high_expr_true_agg, index=cell_types, columns=gene_names) + + return df_pos, df_neg + +if not os.path.exists(par['temp']): + os.makedirs(par['temp']) + sdata = sd.read_zarr(par['input']) sdata_genes = sdata['transcripts']["feature_name"].unique().compute().sort_values().tolist() -# Creation of the data for a yaml file - input for BIDCELL -# Extracting DAPI image from dataset.zarr + + image_pyramid = [] -img = sdata["morphology_mip"]["/scale0"]["image"].values -img = np.squeeze(img) +img = sdata["morphology_mip"]["/scale0"]["image"].values # Convert dask array to numpy +img = np.squeeze(img) # Remove singleton channel dimension (c:1) image_pyramid.append(img) -if not os.path.exists(par['temp']): - os.makedirs(par['temp']) + # Save the TIFF file in the temporary directory with tifffile.TiffWriter(f"{par['temp']}morphology_mip_pyramidal.tiff", bigtiff=True) as tiff: for img in image_pyramid: tiff.write(img, photometric="minisblack", resolution=(1, 1)) -# Converting h5ad single cell reference to .csv + + adata = sc.read_h5ad(par['input_scrnaseq_reference']) shared_genes = [g for g in sdata_genes if g in adata.var["feature_name"].values] adata = adata[:, adata.var["feature_name"].isin(shared_genes)] - adata.var_names = adata.var["feature_name"].astype(str) + + +# Make scref.csv sc_ref = pd.DataFrame( - data=adata[:, shared_genes].layers["normalized"].toarray(), - columns=shared_genes, - index=range(adata.n_obs) - ) + data=adata.layers["normalized"].toarray(), + columns=shared_genes, + index=range(adata.n_obs) +) + + celltypes = adata.obs['cell_type'].unique().tolist() cell_type_col = adata.obs['cell_type'].astype('category') sc_ref["ct_idx"] = cell_type_col.cat.codes.values @@ -105,10 +125,15 @@ def generate_markers(ref_df, max_overlaps_pos=4, max_overlaps_neg=15): sc_ref["atlas"] = "custom" sc_ref.to_csv(f"{par['temp']}scref.csv") -# generating transcript map .csv from test data + transcript = sdata["transcripts"].compute() transcript = pd.DataFrame(transcript) -transcript[transcript["feature_name"].isin(shared_genes)].to_csv(f"{par['temp']}transcript.csv.gz", compression='gzip') +transcript[transcript["feature_name"].isin(shared_genes)].to_csv( + f"{par['temp']}transcript.csv.gz", compression="gzip" +) + + + # generate positive and negative marker files df_pos, df_neg = generate_markers(sc_ref, max_overlaps_pos=4, max_overlaps_neg=15) @@ -116,7 +141,6 @@ def generate_markers(ref_df, max_overlaps_pos=4, max_overlaps_neg=15): df_neg.to_csv(f"{par['temp']}/neg_marker.csv") - import yaml config = { @@ -146,24 +170,23 @@ def generate_markers(ref_df, max_overlaps_pos=4, max_overlaps_neg=15): "NegControlCodeword_", "BLANK_", "Blank-", - "NegPrb", + "NegPrb" ], + }, "affine": { "target_pix_um": 1.0, - "base_pix_x": 1.0, #0.2125, - "base_pix_y": 1.0, #0.2125, - "base_ts_x": 1.0, - "base_ts_y": 1.0, + "base_pix_x": 0.2125, + "base_pix_y": 0.2125, + "base_ts_x": 0.1, + "base_ts_y": 0.1, "global_shift_x": 0, "global_shift_y": 0, }, "model_params": { "name": "custom", "patch_size": 48, - "elongated": [ - '01 IT-ET Glut', '02 NP-CT-L6b Glut', '03 MOB-DG-IMN', '04 CGE GABA', '05 MGE GABA', '06 CNU GABA', '08 MH-LH Glut', '09 TH Glut', '11 HY GABA', '12 MOB-CR Glut', '13 CNU-HYa Glut', '14 CNU-HYa GABA', '16 MB Glut', '20 MB GABA', '28 Astro-Epen', '29 Oligo', '30 OEG', '31 Vascular', '32 Immune' - ], + "elongated": list(sc_ref["cell_type"]), }, "training_params": { "total_epochs": 1, @@ -191,10 +214,15 @@ def generate_markers(ref_df, max_overlaps_pos=4, max_overlaps_neg=15): +#for i in sc_ref.columns: +# print(i, flush = True) # # Setting up and running BIDCell model = BIDCellModel(f"{par['temp']}testdata.yaml") model.run_pipeline() + + + # Analysis and visualisation of BIDcell output #dapi_image = tifffile.imread("morphology_mip_pyramidal.tiff") #segmentation_mask = tifffile.imread("epoch_10_step_60_connected.tif") From 4b6f566f1048f5bec5d59eaaa6877ef42d81837a Mon Sep 17 00:00:00 2001 From: Kraftfahrzeughaftpflichtversicherung Date: Thu, 25 Sep 2025 18:33:52 +0200 Subject: [PATCH 8/8] 3rd batch of changes --- src/methods_segmentation/bidcell/script.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/methods_segmentation/bidcell/script.py b/src/methods_segmentation/bidcell/script.py index 884626970..2bff7c0a5 100644 --- a/src/methods_segmentation/bidcell/script.py +++ b/src/methods_segmentation/bidcell/script.py @@ -13,7 +13,8 @@ ## VIASH START par = { - 'input': "../task_ist_preprocessing/resources_test/common/2023_10x_mouse_brain_xenium/dataset.zarr", + 'input': "../task_ist_preprocessing/resources_test/common/2023_10x_mouse_brain_xenium_rep1/dataset.zarr", + 'input_scrnaseq_reference': '..task_ist_preprocessing/resources_test/common/2023_yao_mouse_brain_scrnaseq_10xv2', 'temp': './temp/bidcell/', 'output': 'output.zarr', 'single_cell_ref': None, @@ -186,7 +187,7 @@ def generate_markers(ref_df, max_overlaps_pos=4, max_overlaps_neg=15): "model_params": { "name": "custom", "patch_size": 48, - "elongated": list(sc_ref["cell_type"]), + "elongated": [], #list(sc_ref["cell_type"]), }, "training_params": { "total_epochs": 1,