diff --git a/README.md b/README.md index 21d1e372a..eee10a646 100644 --- a/README.md +++ b/README.md @@ -71,7 +71,8 @@ flowchart TB Chromatin accessibility data -Example file: `resources_test/grn_benchmark/inference_datasets//op_atac.h5ad` +Example file: +`resources_test/grn_benchmark/inference_data//op_atac.h5ad` Format: @@ -120,7 +121,7 @@ Arguments: File indicating the inferred GRN. -Example file: `resources/grn_models/op/collectri.h5ad` +Example file: `resources_test/grn_models/op/collectri.h5ad` Format: @@ -139,7 +140,7 @@ Data structure: |:---|:---|:---| | `uns["dataset_id"]` | `string` | A unique identifier for the dataset. | | `uns["method_id"]` | `string` | A unique identifier for the inference method. | -| `uns["prediction"]` | `DataFrame` | Inferred GRNs in the format of source, target, weight. | +| `uns["prediction"]` | `object` | Inferred GRNs in the format of source, target, weight. | @@ -245,7 +246,8 @@ Data structure: Perturbation dataset for benchmarking. -Example file: `resources_test/grn_benchmark/evaluation_data//op.h5ad` +Example file: +`resources_test/grn_benchmark/evaluation_data/op_bulk.h5ad` Format: @@ -275,7 +277,8 @@ Data structure: RNA expression data. -Example file: `resources_test/grn_benchmark/inference_datasets//op_rna.h5ad` +Example file: +`resources_test/grn_benchmark/inference_data/op_rna.h5ad` Format: diff --git a/_viash.yaml b/_viash.yaml index 11c4a0779..a2016b10c 100644 --- a/_viash.yaml +++ b/_viash.yaml @@ -59,7 +59,7 @@ info: ```bash viash run src/control_methods/pearson_corr/config.vsh.yaml -- \ - --rna resources/grn_benchmark/inference_datasets/norman_rna.h5ad --prediction output/net.h5ad + --rna resources/grn_benchmark/inference_data/norman_rna.h5ad --prediction output/net.h5ad ``` ## Evaluate a GRN diff --git a/runs.ipynb b/runs.ipynb index 63654579d..2f2d09bbd 100644 --- a/runs.ipynb +++ b/runs.ipynb @@ -14,40 +14,73 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "download: s3://openproblems-data/resources/grn/grn_benchmark/evaluation_datasets/adamson_bulk.h5ad to resources/grn_benchmark/evaluation_datasets/adamson_bulk.h5ad\n", + "download: s3://openproblems-data/resources/grn/grn_benchmark/evaluation_datasets/norman_bulk.h5ad to resources/grn_benchmark/evaluation_datasets/norman_bulk.h5ad\n", + "download: s3://openproblems-data/resources/grn/grn_benchmark/evaluation_datasets/replogle_bulk.h5ad to resources/grn_benchmark/evaluation_datasets/replogle_bulk.h5ad\n", + "download: s3://openproblems-data/resources/grn/grn_benchmark/evaluation_datasets/nakatake_bulk.h5ad to resources/grn_benchmark/evaluation_datasets/nakatake_bulk.h5ad\n", + "download: s3://openproblems-data/resources/grn/grn_benchmark/inference_datasets/adamson_rna.h5ad to resources/grn_benchmark/inference_datasets/adamson_rna.h5ad\n", + "download: s3://openproblems-data/resources/grn/grn_benchmark/evaluation_datasets/op_bulk.h5ad to resources/grn_benchmark/evaluation_datasets/op_bulk.h5ad\n", + "download: s3://openproblems-data/resources/grn/grn_benchmark/inference_datasets/nakatake_rna.h5ad to resources/grn_benchmark/inference_datasets/nakatake_rna.h5ad\n", + "download: s3://openproblems-data/resources/grn/grn_benchmark/evaluation_datasets/adamson_sc.h5ad to resources/grn_benchmark/evaluation_datasets/adamson_sc.h5ad\n", + "download: s3://openproblems-data/resources/grn/grn_benchmark/inference_datasets/op_atac.rds to resources/grn_benchmark/inference_datasets/op_atac.rds\n", + "download: s3://openproblems-data/resources/grn/grn_benchmark/inference_datasets/norman_rna.h5ad to resources/grn_benchmark/inference_datasets/norman_rna.h5ad\n", + "download: s3://openproblems-data/resources/grn/grn_benchmark/inference_datasets/op_rna.rds to resources/grn_benchmark/inference_datasets/op_rna.rds\n", + "download: s3://openproblems-data/resources/grn/grn_benchmark/evaluation_datasets/norman_sc.h5ad to resources/grn_benchmark/evaluation_datasets/norman_sc.h5ad\n", + "download: s3://openproblems-data/resources/grn/grn_benchmark/prior/op/annot_peak_database.csv to resources/grn_benchmark/prior/op/annot_peak_database.csv\n", + "download: s3://openproblems-data/resources/grn/grn_benchmark/prior/op/cell_topic.csv to resources/grn_benchmark/prior/op/cell_topic.csv\n", + "download: s3://openproblems-data/resources/grn/grn_benchmark/prior/op/peaks.bed to resources/grn_benchmark/prior/op/peaks.bed\n", + "download: s3://openproblems-data/resources/grn/grn_benchmark/prior/op/peak_annotation.csv to resources/grn_benchmark/prior/op/peak_annotation.csv\n", + "download: s3://openproblems-data/resources/grn/grn_benchmark/prior/op/peaks.txt to resources/grn_benchmark/prior/op/peaks.txt\n", + "download: s3://openproblems-data/resources/grn/grn_benchmark/prior/regulators_consensus_adamson.json to resources/grn_benchmark/prior/regulators_consensus_adamson.json\n", + "download: s3://openproblems-data/resources/grn/grn_benchmark/prior/regulators_consensus_nakatake.json to resources/grn_benchmark/prior/regulators_consensus_nakatake.json\n", + "download: s3://openproblems-data/resources/grn/grn_benchmark/prior/regulators_consensus_norman.json to resources/grn_benchmark/prior/regulators_consensus_norman.json\n", + "download: s3://openproblems-data/resources/grn/grn_benchmark/prior/regulators_consensus_op.json to resources/grn_benchmark/prior/regulators_consensus_op.json\n", + "download: s3://openproblems-data/resources/grn/grn_benchmark/prior/regulators_consensus_replogle2.json to resources/grn_benchmark/prior/regulators_consensus_replogle2.json\n", + "download: s3://openproblems-data/resources/grn/grn_benchmark/prior/tf_all.csv to resources/grn_benchmark/prior/tf_all.csv\n", + "download: s3://openproblems-data/resources/grn/grn_benchmark/prior/skeleton.csv to resources/grn_benchmark/prior/skeleton.csv\n", + "download: s3://openproblems-data/resources/grn/grn_benchmark/prior/ws_consensus_adamson.csv to resources/grn_benchmark/prior/ws_consensus_adamson.csv\n", + "download: s3://openproblems-data/resources/grn/grn_benchmark/prior/ws_consensus_norman.csv to resources/grn_benchmark/prior/ws_consensus_norman.csv\n", + "download: s3://openproblems-data/resources/grn/grn_benchmark/prior/ws_distance_background_adamson.csv to resources/grn_benchmark/prior/ws_distance_background_adamson.csv\n", + "download: s3://openproblems-data/resources/grn/grn_benchmark/prior/ws_distance_background_norman.csv to resources/grn_benchmark/prior/ws_distance_background_norman.csv\n", + "download: s3://openproblems-data/resources/grn/grn_benchmark/inference_datasets/op_rna.h5ad to resources/grn_benchmark/inference_datasets/op_rna.h5ad\n", + "download: s3://openproblems-data/resources/grn/grn_benchmark/inference_datasets/replogle_rna.h5ad to resources/grn_benchmark/inference_datasets/replogle_rna.h5ad\n", + "download: s3://openproblems-data/resources/grn/grn_benchmark/inference_datasets/op_atac.h5ad to resources/grn_benchmark/inference_datasets/op_atac.h5ad\n", + "download: s3://openproblems-data/resources/grn/grn_benchmark/evaluation_datasets/replogle_sc.h5ad to resources/grn_benchmark/evaluation_datasets/replogle_sc.h5ad\n" + ] + } + ], + "source": [ + "!aws s3 sync s3://openproblems-data/resources/grn/grn_benchmark resources/grn_benchmark --delete\n", + "!aws s3 sync s3://openproblems-data/resources/grn/grn_models resources/grn_models --delete" + ] + }, + { + "cell_type": "code", + "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "upload: resources/grn_benchmark/inference_datasets/op_rna.rds to s3://openproblems-data/resources/grn/grn_benchmark/inference_datasets/op_rna.rds\n", - "upload: resources/grn_benchmark/inference_datasets/op_atac.rds to s3://openproblems-data/resources/grn/grn_benchmark/inference_datasets/op_atac.rds\n", - "upload: resources/grn_benchmark/prior/op/annot_peak_database.csv to s3://openproblems-data/resources/grn/grn_benchmark/prior/op/annot_peak_database.csv\n", - "upload: resources/grn_benchmark/prior/op/cell_topic.csv to s3://openproblems-data/resources/grn/grn_benchmark/prior/op/cell_topic.csv\n", - "upload: resources/grn_benchmark/prior/op/peaks.bed to s3://openproblems-data/resources/grn/grn_benchmark/prior/op/peaks.bed\n", - "upload: resources/grn_benchmark/prior/op/peak_annotation.csv to s3://openproblems-data/resources/grn/grn_benchmark/prior/op/peak_annotation.csv\n", - "upload: resources/grn_benchmark/prior/op/peaks.txt to s3://openproblems-data/resources/grn/grn_benchmark/prior/op/peaks.txt\n", - "upload: resources/grn_benchmark/prior/regulators_consensus_adamson.json to s3://openproblems-data/resources/grn/grn_benchmark/prior/regulators_consensus_adamson.json\n", - "upload: resources/grn_benchmark/prior/regulators_consensus_nakatake.json to s3://openproblems-data/resources/grn/grn_benchmark/prior/regulators_consensus_nakatake.json\n", - "upload: resources/grn_benchmark/prior/regulators_consensus_norman.json to s3://openproblems-data/resources/grn/grn_benchmark/prior/regulators_consensus_norman.json\n", - "upload: resources/grn_benchmark/prior/regulators_consensus_op.json to s3://openproblems-data/resources/grn/grn_benchmark/prior/regulators_consensus_op.json\n", - "upload: resources/grn_benchmark/prior/regulators_consensus_replogle2.json to s3://openproblems-data/resources/grn/grn_benchmark/prior/regulators_consensus_replogle2.json\n", - "upload: resources/grn_benchmark/prior/tf_all.csv to s3://openproblems-data/resources/grn/grn_benchmark/prior/tf_all.csv\n", - "upload: resources/grn_benchmark/inference_datasets/replogle_rna.h5ad to s3://openproblems-data/resources/grn/grn_benchmark/inference_datasets/replogle_rna.h5ad\n", - "upload: resources/grn_benchmark/prior/ws_consensus_adamson.csv to s3://openproblems-data/resources/grn/grn_benchmark/prior/ws_consensus_adamson.csv\n", - "upload: resources/grn_benchmark/prior/ws_consensus_norman.csv to s3://openproblems-data/resources/grn/grn_benchmark/prior/ws_consensus_norman.csv\n", - "upload: resources/grn_benchmark/prior/ws_distance_background_adamson.csv to s3://openproblems-data/resources/grn/grn_benchmark/prior/ws_distance_background_adamson.csv\n", - "upload: resources/grn_benchmark/prior/ws_distance_background_norman.csv to s3://openproblems-data/resources/grn/grn_benchmark/prior/ws_distance_background_norman.csv\n", - "upload: resources/grn_benchmark/prior/skeleton.csv to s3://openproblems-data/resources/grn/grn_benchmark/prior/skeleton.csv\n", - "upload: resources/grn_benchmark/inference_datasets/op_rna.h5ad to s3://openproblems-data/resources/grn/grn_benchmark/inference_datasets/op_rna.h5ad\n", - "upload: resources/grn_benchmark/inference_datasets/op_atac.h5ad to s3://openproblems-data/resources/grn/grn_benchmark/inference_datasets/op_atac.h5ad\n", - "upload: resources/grn_benchmark/evaluation_datasets/replogle_sc.h5ad to s3://openproblems-data/resources/grn/grn_benchmark/evaluation_datasets/replogle_sc.h5ad\n" + "delete: s3://openproblems-data/resources_test/grn/grn_benchmark/evaluation_datasets/op_perturbation.h5ad\n", + "delete: s3://openproblems-data/resources_test/grn/grn_benchmark/inference_datasets/op_atac.h5ad\n", + "delete: s3://openproblems-data/resources_test/grn/grn_benchmark/inference_datasets/op_rna.h5ad\n", + "upload: resources_test/grn_benchmark/inference_data/op_atac.h5ad to s3://openproblems-data/resources_test/grn/grn_benchmark/inference_data/op_atac.h5ad\n", + "upload: resources_test/grn_benchmark/inference_data/op_rna.h5ad to s3://openproblems-data/resources_test/grn/grn_benchmark/inference_data/op_rna.h5ad\n", + "upload: resources_test/grn_benchmark/evaluation_data/op_bulk.h5ad to s3://openproblems-data/resources_test/grn/grn_benchmark/evaluation_data/op_bulk.h5ad\n" ] } ], "source": [ - "!aws s3 sync resources/grn_benchmark s3://openproblems-data/resources/grn/grn_benchmark --delete" + "!aws s3 sync resources_test s3://openproblems-data/resources_test/grn/ --delete" ] }, { @@ -62,7 +95,7 @@ "# !aws s3 sync resources/results/ s3://openproblems-data/resources/grn/results --delete\n", "# !aws s3 sync resources/scores/ s3://openproblems-data/resources/grn/scores --delete\n", "# !aws s3 sync resources/grn_benchmark/evaluation_data// s3://openproblems-data/resources/grn/evaluation_data/ --delete\n", - "# !aws s3 sync resources/grn_benchmark/inference_datasets/ s3://openproblems-data/resources/grn/inference_datasets/ --delete" + "# !aws s3 sync resources/grn_benchmark/inference_data/ s3://openproblems-data/resources/grn/inference_data/ --delete" ] }, { @@ -199,7 +232,7 @@ "!aws s3 sync s3://openproblems-data/resources/grn/ resources/ --delete\n", "# aws s3 sync s3://openproblems-data/resources/grn/results resources/results/ --delete\n", "# aws s3 sync s3://openproblems-data/resources/grn/grn_models resources/grn_models/\n", - "# aws s3 sync s3://openproblems-data/resources/grn/inference_datasets/ resources/grn_benchmark/inference_datasets/\n", + "# aws s3 sync s3://openproblems-data/resources/grn/inference_data/ resources/grn_benchmark/inference_data/\n", "# aws s3 sync s3://openproblems-data/resources/grn/evaluation_data/ resources/grn_benchmark/evaluation_data//" ] }, @@ -210,7 +243,7 @@ "outputs": [], "source": [ "# !aws s3 sync resources_test/ s3://openproblems-data/resources_test/grn/ --delete\n", - "# !aws s3 sync resources/grn_benchmark/inference_datasets/ s3://openproblems-data/resources/grn/inference_datasets/ --delete" + "# !aws s3 sync resources/grn_benchmark/inference_data/ s3://openproblems-data/resources/grn/inference_data/ --delete" ] }, { @@ -222,7 +255,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ @@ -257,14 +290,26 @@ " 'scores_dir': 'resources/scores'\n", "}\n", "\n", - "datasets = ['op', 'replogle', 'nakatake', 'norman', 'adamson']" + "datasets = ['op', 'replogle2', 'nakatake', 'norman', 'adamson']" ] }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 3, "metadata": {}, - "outputs": [], + "outputs": [ + { + "ename": "NameError", + "evalue": "name 'datasets' is not defined", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[3], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m dataset \u001b[38;5;129;01min\u001b[39;00m \u001b[43mdatasets\u001b[49m:\n\u001b[1;32m 2\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m method \u001b[38;5;129;01min\u001b[39;00m par[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mmethods\u001b[39m\u001b[38;5;124m'\u001b[39m]:\n\u001b[1;32m 3\u001b[0m file_name \u001b[38;5;241m=\u001b[39m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mresources/grn_models/\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mdataset\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m/\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mmethod\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m.csv\u001b[39m\u001b[38;5;124m'\u001b[39m\n", + "\u001b[0;31mNameError\u001b[0m: name 'datasets' is not defined" + ] + } + ], "source": [ "for dataset in datasets:\n", " for method in par['methods']:\n", @@ -278,46 +323,33 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 2, "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/home/jnourisa/miniconda3/envs/py10/lib/python3.10/site-packages/anndata/_core/aligned_df.py:68: ImplicitModificationWarning: Transforming to str index.\n", - " warnings.warn(\"Transforming to str index.\", ImplicitModificationWarning)\n", - "/home/jnourisa/miniconda3/envs/py10/lib/python3.10/site-packages/anndata/_core/aligned_df.py:68: ImplicitModificationWarning: Transforming to str index.\n", - " warnings.warn(\"Transforming to str index.\", ImplicitModificationWarning)\n" - ] - } - ], + "outputs": [], "source": [ "import anndata as ad\n", - "adata = ad.read_h5ad('output/score_op_scprint.h5ad')" + "adata = ad.read_h5ad('K562_gwps_raw_singlecell.h5ad', backed='r')" ] }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 1, "metadata": {}, "outputs": [ { - "data": { - "text/plain": [ - "{'dataset_id': 'op',\n", - " 'method_id': 'reg2-scprint',\n", - " 'metric_ids': array(['reg2-theta-0.0', 'reg2-theta-0.5', 'reg2-theta-1.0'], dtype=object),\n", - " 'metric_values': array([0.23660427, 0.28158203, 0.31389769])}" - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" + "ename": "NameError", + "evalue": "name 'adata' is not defined", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[1], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43madata\u001b[49m\u001b[38;5;241m.\u001b[39mobs\n", + "\u001b[0;31mNameError\u001b[0m: name 'adata' is not defined" + ] } ], "source": [ - "adata.uns" + "adata.obs" ] }, { @@ -1069,7 +1101,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### replogle" + "### replogle2" ] }, { @@ -1287,7 +1319,7 @@ } ], "source": [ - "df_scores = pd.read_csv(f\"resources/scores/replogle/50000-skeleton_False-binarize_True-ridge.csv\", index_col=0)\n", + "df_scores = pd.read_csv(f\"resources/scores/replogle2/50000-skeleton_False-binarize_True-ridge.csv\", index_col=0)\n", "# df_scores[df_scores<0] = 0\n", "\n", "df_scores_f = df_scores[['static-theta-0.0', 'static-theta-0.5', 'static-theta-1.0']]\n", @@ -3831,7 +3863,7 @@ ], "metadata": { "kernelspec": { - "display_name": "base", + "display_name": "py10", "language": "python", "name": "python3" }, @@ -3845,7 +3877,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.15" + "version": "3.10.14" } }, "nbformat": 4, diff --git a/scripts/add_a_method.sh b/scripts/add_a_method.sh index e697fbf71..209e1e9a6 100644 --- a/scripts/add_a_method.sh +++ b/scripts/add_a_method.sh @@ -27,7 +27,7 @@ viash run src/methods/$method_id/config.vsh.yaml -- \ # run the inference using the method for op dataset using only RNA data. Add more aurguments if needed. viash run src/methods/$method_id/config.vsh.yaml -- \ - --rna "resources/grn_benchmark/inference_datasets/op_rna.h5ad" \ + --rna "resources/grn_benchmark/inference_data/op_rna.h5ad" \ --prediction "output/prediction.h5ad" # run evaluation metrics diff --git a/scripts/download_resources.sh b/scripts/download_resources.sh index 9bf81a885..0fe90771e 100755 --- a/scripts/download_resources.sh +++ b/scripts/download_resources.sh @@ -4,4 +4,4 @@ set -e # common/scripts/sync_resources -aws s3 sync s3://openproblems-data/resources/grn/grn_benchmark resources/grn_benchmark --delete --no-sign-request \ No newline at end of file +aws s3 sync s3://openproblems-data/resources/grn/grn_benchmark resources/grn_benchmark --delete --no-sign-request diff --git a/scripts/run_benchmark_all.sh b/scripts/run_benchmark_all.sh index 01b96b479..4bab0fff5 100644 --- a/scripts/run_benchmark_all.sh +++ b/scripts/run_benchmark_all.sh @@ -22,8 +22,8 @@ param_list: metric_ids: $metric_ids method_ids: $method_ids evaluation_data: ${resources_dir}/evaluation_data/${dataset}.h5ad - rna: ${resources_dir}/inference_datasets/${dataset}_rna.h5ad - atac: ${resources_dir}/inference_datasets/${dataset}_atac.h5ad + rna: ${resources_dir}/inference_data/${dataset}_rna.h5ad + atac: ${resources_dir}/inference_data/${dataset}_atac.h5ad reg_type: $reg_type subsample: $subsample num_workers: $num_workers diff --git a/scripts/run_grn_evaluation.sh b/scripts/run_grn_evaluation.sh index 3032d7a58..ed3d918f3 100644 --- a/scripts/run_grn_evaluation.sh +++ b/scripts/run_grn_evaluation.sh @@ -57,15 +57,16 @@ append_entry() { cat >> $param_file << HERE - id: ${reg_type}_${1} metric_ids: ${metric_ids} - evaluation_data: ${resources_dir}/evaluation_data/${dataset}.h5ad - evaluation_data_sc: ${resources_dir}/evaluation_data/${dataset}_sc_counts.h5ad + evaluation_data: ${resources_dir}/grn_benchmark/evaluation_data/${dataset}.h5ad + evaluation_data_sc: ${resources_dir}/grn_benchmark/evaluation_data/${dataset}_sc.h5ad reg_type: $reg_type method_id: $1 + dataset_id: $dataset num_workers: $num_workers - tf_all: ${resources_dir}/prior/tf_all.csv - regulators_consensus: ${resources_dir}/prior/regulators_consensus_${dataset}.json - ws_consensus: ${resources_dir}/prior/ws_consensus_${dataset}.json - ws_distance_background: ${resources_dir}/prior/ws_distance_background_${dataset}.json + tf_all: ${resources_dir}/grn_benchmark/prior/tf_all.csv + regulators_consensus: ${resources_dir}/grn_benchmark/prior/regulators_consensus_${dataset}.json + ws_consensus: ${resources_dir}/grn_benchmark/prior/ws_consensus_${dataset}.csv + ws_distance_background: ${resources_dir}/grn_benchmark/prior/ws_distance_background_${dataset}.csv prediction: ${grn_models_folder}/${dataset}/$1.h5ad layer: "X_norm" HERE diff --git a/src/api/file_atac_h5ad.yaml b/src/api/file_atac_h5ad.yaml index e0a4aefd0..21f34423c 100644 --- a/src/api/file_atac_h5ad.yaml +++ b/src/api/file_atac_h5ad.yaml @@ -1,5 +1,5 @@ type: file -example: resources_test/grn_benchmark/inference_datasets//op_atac.h5ad +example: resources_test/grn_benchmark/inference_data//op_atac.h5ad label: chromatin accessibility data summary: "Chromatin accessibility data" info: diff --git a/src/api/file_rna_h5ad.yaml b/src/api/file_rna_h5ad.yaml index 74e981370..9be9167ac 100644 --- a/src/api/file_rna_h5ad.yaml +++ b/src/api/file_rna_h5ad.yaml @@ -1,5 +1,5 @@ type: file -example: resources_test/grn_benchmark/inference_datasets/op_rna.h5ad +example: resources_test/grn_benchmark/inference_data/op_rna.h5ad label: gene expression data summary: "RNA expression data." info: diff --git a/src/helper.py b/src/helper.py index 6dc454835..7056439c4 100644 --- a/src/helper.py +++ b/src/helper.py @@ -40,7 +40,7 @@ def analyse_meta_cells(task_grn_inference_dir): par = { - 'rna': f'{task_grn_inference_dir}/resources/grn_benchmark/inference_datasets/{dataset}_rna.h5ad', + 'rna': f'{task_grn_inference_dir}/resources/grn_benchmark/inference_data/{dataset}_rna.h5ad', "evaluation_data": f"{task_grn_inference_dir}/resources/grn_benchmark/evaluation_data//{dataset}.h5ad", 'layer': 'X_norm', @@ -123,7 +123,7 @@ def analyse_imputation(task_grn_inference_dir): par = { - 'rna': f'{task_grn_inference_dir}/resources/grn_benchmark/inference_datasets/{dataset}_rna.h5ad', + 'rna': f'{task_grn_inference_dir}/resources/grn_benchmark/inference_data/{dataset}_rna.h5ad', "evaluation_data": f"{task_grn_inference_dir}/resources/grn_benchmark/evaluation_data//{dataset}.h5ad", 'layer': 'X_norm', @@ -204,7 +204,7 @@ def analyse_imputation(task_grn_inference_dir): def analyse_corr_vs_tfmasked_corr(task_grn_inference_dir): for i_run, dataset in enumerate(['op', 'replogle', 'nakatake', 'norman', 'adamson']): par = { - 'rna': f'{task_grn_inference_dir}/resources/grn_benchmark/inference_datasets/{dataset}_rna.h5ad', + 'rna': f'{task_grn_inference_dir}/resources/grn_benchmark/inference_data/{dataset}_rna.h5ad', "evaluation_data": f"{task_grn_inference_dir}/resources/grn_benchmark/evaluation_data//{dataset}.h5ad", 'layer': 'X_norm', diff --git a/src/methods/multi_omics/celloracle/script.py b/src/methods/multi_omics/celloracle/script.py index 0ca2676b8..6b0ccdbf3 100644 --- a/src/methods/multi_omics/celloracle/script.py +++ b/src/methods/multi_omics/celloracle/script.py @@ -6,8 +6,8 @@ ## VIASH START par = { - "rna": "resources/grn_benchmark/inference_datasets/op_rna.h5ad", - "atac": "resources/grn_benchmark/inference_datasets/op_atac.h5ad", + "rna": "resources/grn_benchmark/inference_data/op_rna.h5ad", + "atac": "resources/grn_benchmark/inference_data/op_atac.h5ad", "base_grn": 'output/celloracle/base_grn.csv', "temp_dir": 'output/celloracle/', "num_workers": 10, diff --git a/src/methods/single_omics/scgpt/run.sh b/src/methods/single_omics/scgpt/run.sh index 7b409ad1f..b833dd98c 100644 --- a/src/methods/single_omics/scgpt/run.sh +++ b/src/methods/single_omics/scgpt/run.sh @@ -1,4 +1,4 @@ viash run src/methods/single_omics/scgpt/config.vsh.yaml -- \ - --rna resources_test/grn_benchmark/inference_datasets//op_rna.h5ad \ + --rna resources_test/grn_benchmark/inference_data//op_rna.h5ad \ --tf_all resources/grn_benchmark/prior/tf_all.csv \ --prediction output/prediction.h5ad \ No newline at end of file diff --git a/src/methods/single_omics/scprint/run.sh b/src/methods/single_omics/scprint/run.sh index e8988169d..31c97b8a1 100644 --- a/src/methods/single_omics/scprint/run.sh +++ b/src/methods/single_omics/scprint/run.sh @@ -13,13 +13,13 @@ # viash run src/methods/single_omics/scprint/config.vsh.yaml -- \ -# --rna resources_test/grn_benchmark/inference_datasets//op_rna.h5ad \ +# --rna resources_test/grn_benchmark/inference_data//op_rna.h5ad \ # --tf_all resources/grn_benchmark/prior/tf_all.csv \ # --prediction output/prediction.h5ad # python src/methods/single_omics/scprint/script.py \ -# --rna resources/grn_benchmark/inference_datasets/op_rna.h5ad \ +# --rna resources/grn_benchmark/inference_data/op_rna.h5ad \ # --tf_all resources/grn_benchmark/prior/tf_all.csv \ # --prediction output/prediction.h5ad diff --git a/src/methods/single_omics/scprint/script.py b/src/methods/single_omics/scprint/script.py index 6ff098cf1..4b791612e 100644 --- a/src/methods/single_omics/scprint/script.py +++ b/src/methods/single_omics/scprint/script.py @@ -28,7 +28,7 @@ ## VIASH START par = { - 'rna': 'resources/grn_benchmark/inference_datasets/op_rna.h5ad', + 'rna': 'resources/grn_benchmark/inference_data/op_rna.h5ad', 'tf_all': 'resources/grn_benchmark/prior/tf_all.csv', 'prediction': 'output/grn.h5ad', 'filtration': 'top-k', diff --git a/src/process_data/explanatory_analysis/peak_annotation/script.R b/src/process_data/explanatory_analysis/peak_annotation/script.R index e3cda8e0f..bed2e4dac 100644 --- a/src/process_data/explanatory_analysis/peak_annotation/script.R +++ b/src/process_data/explanatory_analysis/peak_annotation/script.R @@ -11,7 +11,7 @@ library(tibble) ## VIASH START par <- list( - multiomics_atac = "resources/grn_benchmark/inference_datasets/op_atac.h5ad", + multiomics_atac = "resources/grn_benchmark/inference_data/op_atac.h5ad", annot_peak_database = "resources/grn_benchmark/prior/peak_annotation.csv" ) ## VIASH END diff --git a/src/process_data/op_multiomics/format_data/script.py b/src/process_data/op_multiomics/format_data/script.py index 7031a712d..a644f5705 100644 --- a/src/process_data/op_multiomics/format_data/script.py +++ b/src/process_data/op_multiomics/format_data/script.py @@ -4,8 +4,8 @@ ## VIASH START par = { 'multiome_counts': 'resources/datasets_raw/op_multiome_sc_counts.h5ad', - 'multiomics_rna': 'resources/grn_benchmark/inference_datasets/op_rna.h5ad', - 'multiomics_atac': 'resources/grn_benchmark/inference_datasets/op_atac.h5ad' + 'multiomics_rna': 'resources/grn_benchmark/inference_data/op_rna.h5ad', + 'multiomics_atac': 'resources/grn_benchmark/inference_data/op_atac.h5ad' } ## VIASH END diff --git a/src/process_data/pereggrn/script.py b/src/process_data/pereggrn/script.py index bb2c109ce..740cea933 100644 --- a/src/process_data/pereggrn/script.py +++ b/src/process_data/pereggrn/script.py @@ -105,7 +105,7 @@ def process_dataset(file_name): adata.write(f'resources/grn_benchmark/evaluation_data/{file_name}_sc.h5ad') adata_bulked.write(f'resources/extended_data/{file_name}_bulk.h5ad') - adata_train.write(f'resources/grn_benchmark/inference_datasets/{file_name}_rna.h5ad') + adata_train.write(f'resources/grn_benchmark/inference_data/{file_name}_rna.h5ad') adata_test.write(f'resources/grn_benchmark/evaluation_data/{file_name}_bulk.h5ad') diff --git a/src/process_data/replogle_k562_gwps/run.sh b/src/process_data/replogle_k562_gwps/run.sh index 4364003a3..8790f731f 100644 --- a/src/process_data/replogle_k562_gwps/run.sh +++ b/src/process_data/replogle_k562_gwps/run.sh @@ -18,5 +18,5 @@ python src/process_data/replogle_k562_gwps/script.py \ --adata_bulk resources/extended_data/replogle_bulk.h5ad \ --adata_test_sc resources/grn_benchmark/evaluation_data/replogle_sc.h5ad \ --adata_test_bulk resources/grn_benchmark/evaluation_data/replogle_bulk.h5ad \ - --adata_train_bulk resources/grn_benchmark/inference_datasets/replogle_rna.h5ad \ + --adata_train_bulk resources/grn_benchmark/inference_data/replogle_rna.h5ad \ --adata_train_sc resources/extended_data/replogle_train_sc.h5ad \ \ No newline at end of file diff --git a/src/process_data/test_data/run.sh b/src/process_data/test_data/run.sh index 0e0af2bb9..918b276df 100644 --- a/src/process_data/test_data/run.sh +++ b/src/process_data/test_data/run.sh @@ -1,6 +1,6 @@ viash run src/process_data/test_data/config.novsh.yaml -- \ - --rna resources/grn_benchmark/inference_datasets/op_rna.h5ad --rna_test resources_test/grn_benchmark/inference_datasets//op_rna.h5ad \ - --atac resources/grn_benchmark/inference_datasets/op_atac.h5ad --atac_test resources_test/grn_benchmark/inference_datasets//op_atac.h5ad \ + --rna resources/grn_benchmark/inference_data/op_rna.h5ad --rna_test resources_test/grn_benchmark/inference_data//op_rna.h5ad \ + --atac resources/grn_benchmark/inference_data/op_atac.h5ad --atac_test resources_test/grn_benchmark/inference_data//op_atac.h5ad \ --perturbation_data resources/grn_benchmark/evaluation_data/op.h5ad --perturbation_data_test resources_test/grn_benchmark/evaluation_data/op.h5ad \ --multiomics_counts resources/grn_benchmark/datasets_raw/op_multiome_sc_counts.h5ad --multiomics_counts_test resources_test/grn_benchmark/datasets_raw/op_multiome_sc_counts.h5ad \ # --perturbation_counts resources/datasets_raw/op_perturbation_sc_counts.h5ad --perturbation_counts_test resources_test/datasets_raw/op_perturbation_sc_counts.h5ad \ diff --git a/src/process_data/test_data/script.py b/src/process_data/test_data/script.py index 6588565bd..83b915a97 100644 --- a/src/process_data/test_data/script.py +++ b/src/process_data/test_data/script.py @@ -12,11 +12,11 @@ ## VIASH START par = { - 'rna': 'resources/grn_benchmark/inference_datasets/op_rna.h5ad', - 'rna_test': 'resources_test/grn_benchmark/inference_datasets//op_rna.h5ad', + 'rna': 'resources/grn_benchmark/inference_data/op_rna.h5ad', + 'rna_test': 'resources_test/grn_benchmark/inference_data//op_rna.h5ad', - 'atac': 'resources/grn_benchmark/inference_datasets/op_atac.h5ad', - 'atac_test': 'resources_test/grn_benchmark/inference_datasets//op_atac.h5ad', + 'atac': 'resources/grn_benchmark/inference_data/op_atac.h5ad', + 'atac_test': 'resources_test/grn_benchmark/inference_data//op_atac.h5ad', 'perturbation_data': 'resources/grn_benchmark/evaluation_data//op.h5ad', 'perturbation_data_test': 'resources_test/grn_benchmark/evaluation_data//op.h5ad', diff --git a/src/workflows/run_grn_evaluation/config.vsh.yaml b/src/workflows/run_grn_evaluation/config.vsh.yaml index fca0a232b..a504673ed 100644 --- a/src/workflows/run_grn_evaluation/config.vsh.yaml +++ b/src/workflows/run_grn_evaluation/config.vsh.yaml @@ -25,6 +25,10 @@ argument_groups: type: string required: true direction: input + - name: --dataset_id + type: string + required: true + direction: input - name: --tf_all type: file direction: input diff --git a/src/workflows/run_grn_evaluation/main.nf b/src/workflows/run_grn_evaluation/main.nf index 164a9bb3c..ac3551587 100644 --- a/src/workflows/run_grn_evaluation/main.nf +++ b/src/workflows/run_grn_evaluation/main.nf @@ -39,11 +39,13 @@ workflow run_wf { // use 'fromState' to fetch the arguments the component requires from the overall state fromState: [ evaluation_data: "evaluation_data", + evaluation_data_sc: "evaluation_data_sc", prediction: "prediction", ws_distance_background: "ws_distance_background", subsample: "subsample", reg_type: "reg_type", method_id: "method_id", + dataset_id: "method_id", num_workers: "num_workers", regulators_consensus: "regulators_consensus", ws_consensus: "ws_consensus", diff --git a/src/workflows_local/benchmark/methods/script.py b/src/workflows_local/benchmark/methods/script.py index 9c32eb2ff..eb630ba9b 100644 --- a/src/workflows_local/benchmark/methods/script.py +++ b/src/workflows_local/benchmark/methods/script.py @@ -20,8 +20,8 @@ def run_grn_inference(par, dataset='op', subsample=None): par_local = { 'models_dir': f'resources/grn_models/{dataset}/', - 'rna': f'resources/grn_benchmark/inference_datasets/{dataset}_rna.h5ad', - 'atac': f'resources/grn_benchmark/inference_datasets/{dataset}_atac.h5ad', + 'rna': f'resources/grn_benchmark/inference_data/{dataset}_rna.h5ad', + 'atac': f'resources/grn_benchmark/inference_data/{dataset}_atac.h5ad', 'rna_positive_control': f'resources/datasets_raw/{dataset}.h5ad', 'num_workers': 10, 'tmp_dir': 'output/grn_inference' diff --git a/test.ipynb b/test.ipynb index 63c2aa963..af308d81a 100644 --- a/test.ipynb +++ b/test.ipynb @@ -50,7 +50,7 @@ } ], "source": [ - "adata = ad.read_h5ad('resources/grn_benchmark/inference_datasets/replogle_rna.h5ad')\n", + "adata = ad.read_h5ad('resources/grn_benchmark/inference_data/replogle_rna.h5ad')\n", "adata[adata.obs['is_control']]" ] }