diff --git a/params/subsample_200_ridge.yaml b/params/subsample_200_ridge.yaml index da36ee140..ddb96e055 100644 --- a/params/subsample_200_ridge.yaml +++ b/params/subsample_200_ridge.yaml @@ -1,22 +1,321 @@ param_list: - id: pearson_celloracle - perturbation_data: resources/grn-benchmark/perturbation_data.h5ad + perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad layer: pearson - prediction: resources/grn_models/celloracle.csv + prediction: s3://openproblems-data/resources/grn/grn_models/celloracle.csv reg_type: ridge method_id: celloracle subsample: 200 max_workers: 20 - - id: lognorm_positive_control - perturbation_data: resources/grn-benchmark/perturbation_data.h5ad + - id: lognorm_celloracle + perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad + layer: lognorm + prediction: s3://openproblems-data/resources/grn/grn_models/celloracle.csv + reg_type: ridge + method_id: celloracle + subsample: 200 + max_workers: 20 + + - id: scgen_pearson_celloracle + perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad + layer: scgen_pearson + prediction: s3://openproblems-data/resources/grn/grn_models/celloracle.csv + reg_type: ridge + method_id: celloracle + subsample: 200 + max_workers: 20 + + - id: scgen_lognorm_celloracle + perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad + layer: scgen_lognorm + prediction: s3://openproblems-data/resources/grn/grn_models/celloracle.csv + reg_type: ridge + method_id: celloracle + subsample: 200 + max_workers: 20 + + - id: seurat_pearson_celloracle + perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad + layer: seurat_pearson + prediction: s3://openproblems-data/resources/grn/grn_models/celloracle.csv + reg_type: ridge + method_id: celloracle + subsample: 200 + max_workers: 20 + + - id: seurat_lognorm_celloracle + perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad + layer: seurat_lognorm + prediction: s3://openproblems-data/resources/grn/grn_models/celloracle.csv + reg_type: ridge + method_id: celloracle + subsample: 200 + max_workers: 20 + + - id: pearson_scenicplus + perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad + layer: pearson + prediction: s3://openproblems-data/resources/grn/grn_models/scenicplus.csv + reg_type: ridge + method_id: scenicplus + subsample: 200 + max_workers: 20 + + - id: lognorm_scenicplus + perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad + layer: lognorm + prediction: s3://openproblems-data/resources/grn/grn_models/scenicplus.csv + reg_type: ridge + method_id: scenicplus + subsample: 200 + max_workers: 20 + + - id: scgen_pearson_scenicplus + perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad + layer: scgen_pearson + prediction: s3://openproblems-data/resources/grn/grn_models/scenicplus.csv + reg_type: ridge + method_id: scenicplus + subsample: 200 + max_workers: 20 + + - id: scgen_lognorm_scenicplus + perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad + layer: scgen_lognorm + prediction: s3://openproblems-data/resources/grn/grn_models/scenicplus.csv + reg_type: ridge + method_id: scenicplus + subsample: 200 + max_workers: 20 + + - id: seurat_pearson_scenicplus + perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad + layer: seurat_pearson + prediction: s3://openproblems-data/resources/grn/grn_models/scenicplus.csv + reg_type: ridge + method_id: scenicplus + subsample: 200 + max_workers: 20 + + - id: seurat_lognorm_scenicplus + perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad + layer: seurat_lognorm + prediction: s3://openproblems-data/resources/grn/grn_models/scenicplus.csv + reg_type: ridge + method_id: scenicplus + subsample: 200 + max_workers: 20 + + - id: pearson_figr + perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad + layer: pearson + prediction: s3://openproblems-data/resources/grn/grn_models/figr.csv + reg_type: ridge + method_id: figr + subsample: 200 + max_workers: 20 + + - id: lognorm_figr + perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad + layer: lognorm + prediction: s3://openproblems-data/resources/grn/grn_models/figr.csv + reg_type: ridge + method_id: figr + subsample: 200 + max_workers: 20 + + - id: scgen_pearson_figr + perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad + layer: scgen_pearson + prediction: s3://openproblems-data/resources/grn/grn_models/figr.csv + reg_type: ridge + method_id: figr + subsample: 200 + max_workers: 20 + + - id: scgen_lognorm_figr + perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad + layer: scgen_lognorm + prediction: s3://openproblems-data/resources/grn/grn_models/figr.csv + reg_type: ridge + method_id: figr + subsample: 200 + max_workers: 20 + + - id: seurat_pearson_figr + perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad + layer: seurat_pearson + prediction: s3://openproblems-data/resources/grn/grn_models/figr.csv + reg_type: ridge + method_id: figr + subsample: 200 + max_workers: 20 + + - id: seurat_lognorm_figr + perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad + layer: seurat_lognorm + prediction: s3://openproblems-data/resources/grn/grn_models/figr.csv + reg_type: ridge + method_id: figr + subsample: 200 + max_workers: 20 + + - id: pearson_granie + perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad + layer: pearson + prediction: s3://openproblems-data/resources/grn/grn_models/granie.csv + reg_type: ridge + method_id: granie + subsample: 200 + max_workers: 20 + + - id: lognorm_granie + perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad + layer: lognorm + prediction: s3://openproblems-data/resources/grn/grn_models/granie.csv + reg_type: ridge + method_id: granie + subsample: 200 + max_workers: 20 + + - id: scgen_pearson_granie + perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad + layer: scgen_pearson + prediction: s3://openproblems-data/resources/grn/grn_models/granie.csv + reg_type: ridge + method_id: granie + subsample: 200 + max_workers: 20 + + - id: scgen_lognorm_granie + perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad + layer: scgen_lognorm + prediction: s3://openproblems-data/resources/grn/grn_models/granie.csv + reg_type: ridge + method_id: granie + subsample: 200 + max_workers: 20 + + - id: seurat_pearson_granie + perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad + layer: seurat_pearson + prediction: s3://openproblems-data/resources/grn/grn_models/granie.csv + reg_type: ridge + method_id: granie + subsample: 200 + max_workers: 20 + + - id: seurat_lognorm_granie + perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad + layer: seurat_lognorm + prediction: s3://openproblems-data/resources/grn/grn_models/granie.csv + reg_type: ridge + method_id: granie + subsample: 200 + max_workers: 20 + + - id: pearson_scglue + perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad + layer: pearson + prediction: s3://openproblems-data/resources/grn/grn_models/scglue.csv + reg_type: ridge + method_id: scglue + subsample: 200 + max_workers: 20 + + - id: lognorm_scglue + perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad + layer: lognorm + prediction: s3://openproblems-data/resources/grn/grn_models/scglue.csv + reg_type: ridge + method_id: scglue + subsample: 200 + max_workers: 20 + + - id: scgen_pearson_scglue + perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad + layer: scgen_pearson + prediction: s3://openproblems-data/resources/grn/grn_models/scglue.csv + reg_type: ridge + method_id: scglue + subsample: 200 + max_workers: 20 + + - id: scgen_lognorm_scglue + perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad + layer: scgen_lognorm + prediction: s3://openproblems-data/resources/grn/grn_models/scglue.csv + reg_type: ridge + method_id: scglue + subsample: 200 + max_workers: 20 + + - id: seurat_pearson_scglue + perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad + layer: seurat_pearson + prediction: s3://openproblems-data/resources/grn/grn_models/scglue.csv + reg_type: ridge + method_id: scglue + subsample: 200 + max_workers: 20 + + - id: seurat_lognorm_scglue + perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad + layer: seurat_lognorm + prediction: s3://openproblems-data/resources/grn/grn_models/scglue.csv + reg_type: ridge + method_id: scglue + subsample: 200 + max_workers: 20 + + - id: pearson_positive_control + perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad layer: pearson reg_type: ridge method_id: positive_control - tf_all: resources/prior/tf_all.csv subsample: 200 max_workers: 20 + - id: lognorm_positive_control + perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad + layer: lognorm + reg_type: ridge + method_id: positive_control + subsample: 200 + max_workers: 20 + + - id: scgen_pearson_positive_control + perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad + layer: scgen_pearson + reg_type: ridge + method_id: positive_control + subsample: 200 + max_workers: 20 + + - id: scgen_lognorm_positive_control + perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad + layer: scgen_lognorm + reg_type: ridge + method_id: positive_control + subsample: 200 + max_workers: 20 + + - id: seurat_pearson_positive_control + perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad + layer: seurat_pearson + reg_type: ridge + method_id: positive_control + subsample: 200 + max_workers: 20 + + - id: seurat_lognorm_positive_control + perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad + layer: seurat_lognorm + reg_type: ridge + method_id: positive_control + subsample: 200 + max_workers: 20 output_state: "state.yaml" publish_dir: "s3://openproblems-data/resources/grn/results/subsample_200_ridge" diff --git a/scripts/run_grn_evaluation_tw.sh b/scripts/run_grn_evaluation_tw.sh index 6defe7352..13d78ce2b 100644 --- a/scripts/run_grn_evaluation_tw.sh +++ b/scripts/run_grn_evaluation_tw.sh @@ -66,7 +66,8 @@ grn_name="positive_control" for layer in "${layers[@]}"; do cat >> $param_file << HERE - id: ${layer}_${grn_name} - perturbation_data: ${perturbation_data} + perturbation_data: ${resources_dir}/grn-benchmark/perturbation_data.h5ad + tf_all: ${resources_dir}/prior/tf_all.csv layer: ${layer} reg_type: $reg_type method_id: $grn_name diff --git a/scripts/run_grn_inference.sh b/scripts/run_grn_inference.sh index cd548d393..7d04c88ab 100644 --- a/scripts/run_grn_inference.sh +++ b/scripts/run_grn_inference.sh @@ -28,7 +28,7 @@ HERE if [ "$submit" = true ]; then nextflow run . \ - -main-script target/nextflow/workflows/run_grn_inference/main.nf \ + -main-script target/nextflow/workflows/grn_inference_celloracle/main.nf \ -profile docker \ -with-trace \ -c src/common/nextflow_helpers/labels_ci.config \ diff --git a/src/methods/multi_omics/celloracle/config.vsh.yaml b/src/methods/multi_omics/celloracle/config.vsh.yaml index 92219e2a8..21197f9dd 100644 --- a/src/methods/multi_omics/celloracle/config.vsh.yaml +++ b/src/methods/multi_omics/celloracle/config.vsh.yaml @@ -8,14 +8,20 @@ functionality: summary: "GRN inference using celloracle" description: | GRN inference using celloracle. - documentation_url: https://morris-lab.github.io/CellOracle.documentation/ - + documentation_url: https://morris-lab.github.io/CellOracle.documentation/ + arguments: + - name: --base_grn + type: file + direction: output + default: output/celloracle/base_grn.csv + - name: --links + type: file + direction: output + default: output/celloracle/links.celloracle.links resources: - type: python_script path: script.py - path: main.py - - platforms: - type: docker image: kenjikamimoto126/celloracle_ubuntu:0.18.0 diff --git a/src/methods/multi_omics/celloracle/main.py b/src/methods/multi_omics/celloracle/main.py index 6184486cb..c27be06b1 100644 --- a/src/methods/multi_omics/celloracle/main.py +++ b/src/methods/multi_omics/celloracle/main.py @@ -50,7 +50,7 @@ def base_grn(par) -> None: df = tfi.to_dataframe() print("Base GRN is built") - df.to_csv(f"{par['temp_dir']}/grn_celloracle_base.csv") + df.to_csv(par['base_grn']) def preprocess_rna(par) -> None: print("Processing rna data") @@ -102,7 +102,7 @@ def preprocess_rna(par) -> None: def infer_grn(par): print("Inferring GRN using base GRN and rna expression") adata = ad.read_h5ad(f"{par['temp_dir']}/adata.h5ad") - base_GRN = pd.read_csv(f"{par['temp_dir']}/grn_celloracle_base.csv") + base_GRN = pd.read_csv(par['base_grn']) # Instantiate Oracle object oracle = co.Oracle() # Instantiate Oracle object. @@ -125,14 +125,15 @@ def infer_grn(par): b_maxl=k*4, n_jobs=par["num_workers"]) links = oracle.get_links(cluster_name_for_GRN_unit="cell_type", alpha=10, verbose_level=10, n_jobs=par["num_workers"]) - links.to_hdf5(file_path=f"{par['temp_dir']}/links.celloracle.links") + links.to_hdf5(file_path=par['links']) def refine_grns(par): print("Refining GRNs") - links_o = co.load_hdf5(f"{par['temp_dir']}/links.celloracle.links") + links_o = co.load_hdf5(par['links']) links_dict = links_o.links_dict.copy() grn_stack = [] tt = 0.05 for cell_type, grn in links_dict.items(): + print(f"{cell_type}, GRN before filter: {grn.shape}") mask = grn.p