Skip to content

Commit

Permalink
local workflow debuged
Browse files Browse the repository at this point in the history
  • Loading branch information
janursa committed Feb 6, 2025
1 parent 16590d3 commit cb0cafc
Show file tree
Hide file tree
Showing 37 changed files with 449 additions and 264 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -245,7 +245,7 @@ Data structure:

Perturbation dataset for benchmarking.

Example file: `resources_test/grn_benchmark/evaluation_datasets//op_perturbation.h5ad`
Example file: `resources_test/grn_benchmark/evaluation_data//op.h5ad`

Format:

Expand Down
4 changes: 2 additions & 2 deletions _viash.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,8 @@ info:
path: s3://openproblems-data/resources_test/grn/inference_datasets/
dest: resources_test/grn_benchmark/inference_datasets//
- type: s3
path: s3://openproblems-data/resources_test/grn/evaluation_datasets/
dest: resources_test/grn_benchmark/evaluation_datasets//
path: s3://openproblems-data/resources_test/grn/evaluation_data/
dest: resources_test/grn_benchmark/evaluation_data//
- type: s3
path: s3://openproblems-data/resources_test/grn/prior/
dest: resources_test/grn_benchmark/prior/
Expand Down
108 changes: 69 additions & 39 deletions runs.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,40 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 3,
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"upload: resources/grn_benchmark/inference_datasets/op_rna.rds to s3://openproblems-data/resources/grn/grn_benchmark/inference_datasets/op_rna.rds\n",
"upload: resources/grn_benchmark/inference_datasets/op_atac.rds to s3://openproblems-data/resources/grn/grn_benchmark/inference_datasets/op_atac.rds\n",
"upload: resources/grn_benchmark/prior/op/annot_peak_database.csv to s3://openproblems-data/resources/grn/grn_benchmark/prior/op/annot_peak_database.csv\n",
"upload: resources/grn_benchmark/prior/op/cell_topic.csv to s3://openproblems-data/resources/grn/grn_benchmark/prior/op/cell_topic.csv\n",
"upload: resources/grn_benchmark/prior/op/peaks.bed to s3://openproblems-data/resources/grn/grn_benchmark/prior/op/peaks.bed\n",
"upload: resources/grn_benchmark/prior/op/peak_annotation.csv to s3://openproblems-data/resources/grn/grn_benchmark/prior/op/peak_annotation.csv\n",
"upload: resources/grn_benchmark/prior/op/peaks.txt to s3://openproblems-data/resources/grn/grn_benchmark/prior/op/peaks.txt\n",
"upload: resources/grn_benchmark/prior/regulators_consensus_adamson.json to s3://openproblems-data/resources/grn/grn_benchmark/prior/regulators_consensus_adamson.json\n",
"upload: resources/grn_benchmark/prior/regulators_consensus_nakatake.json to s3://openproblems-data/resources/grn/grn_benchmark/prior/regulators_consensus_nakatake.json\n",
"upload: resources/grn_benchmark/prior/regulators_consensus_norman.json to s3://openproblems-data/resources/grn/grn_benchmark/prior/regulators_consensus_norman.json\n",
"upload: resources/grn_benchmark/prior/regulators_consensus_op.json to s3://openproblems-data/resources/grn/grn_benchmark/prior/regulators_consensus_op.json\n",
"upload: resources/grn_benchmark/prior/regulators_consensus_replogle2.json to s3://openproblems-data/resources/grn/grn_benchmark/prior/regulators_consensus_replogle2.json\n",
"upload: resources/grn_benchmark/prior/tf_all.csv to s3://openproblems-data/resources/grn/grn_benchmark/prior/tf_all.csv\n",
"upload: resources/grn_benchmark/inference_datasets/replogle_rna.h5ad to s3://openproblems-data/resources/grn/grn_benchmark/inference_datasets/replogle_rna.h5ad\n",
"upload: resources/grn_benchmark/prior/ws_consensus_adamson.csv to s3://openproblems-data/resources/grn/grn_benchmark/prior/ws_consensus_adamson.csv\n",
"upload: resources/grn_benchmark/prior/ws_consensus_norman.csv to s3://openproblems-data/resources/grn/grn_benchmark/prior/ws_consensus_norman.csv\n",
"upload: resources/grn_benchmark/prior/ws_distance_background_adamson.csv to s3://openproblems-data/resources/grn/grn_benchmark/prior/ws_distance_background_adamson.csv\n",
"upload: resources/grn_benchmark/prior/ws_distance_background_norman.csv to s3://openproblems-data/resources/grn/grn_benchmark/prior/ws_distance_background_norman.csv\n",
"upload: resources/grn_benchmark/prior/skeleton.csv to s3://openproblems-data/resources/grn/grn_benchmark/prior/skeleton.csv\n",
"upload: resources/grn_benchmark/inference_datasets/op_rna.h5ad to s3://openproblems-data/resources/grn/grn_benchmark/inference_datasets/op_rna.h5ad\n",
"upload: resources/grn_benchmark/inference_datasets/op_atac.h5ad to s3://openproblems-data/resources/grn/grn_benchmark/inference_datasets/op_atac.h5ad\n",
"upload: resources/grn_benchmark/evaluation_datasets/replogle_sc.h5ad to s3://openproblems-data/resources/grn/grn_benchmark/evaluation_datasets/replogle_sc.h5ad\n"
]
}
],
"source": [
"!aws s3 sync s3://openproblems-data/resources_test/grn/ resources_test/ --delete"
"!aws s3 sync resources/grn_benchmark s3://openproblems-data/resources/grn/grn_benchmark --delete"
]
},
{
Expand All @@ -32,7 +61,7 @@
"# !aws s3 sync resources/grn_benchmark/prior/ s3://openproblems-data/resources/grn/prior --delete\n",
"# !aws s3 sync resources/results/ s3://openproblems-data/resources/grn/results --delete\n",
"# !aws s3 sync resources/scores/ s3://openproblems-data/resources/grn/scores --delete\n",
"# !aws s3 sync resources/grn_benchmark/evaluation_datasets// s3://openproblems-data/resources/grn/evaluation_datasets/ --delete\n",
"# !aws s3 sync resources/grn_benchmark/evaluation_data// s3://openproblems-data/resources/grn/evaluation_data/ --delete\n",
"# !aws s3 sync resources/grn_benchmark/inference_datasets/ s3://openproblems-data/resources/grn/inference_datasets/ --delete"
]
},
Expand Down Expand Up @@ -171,7 +200,7 @@
"# aws s3 sync s3://openproblems-data/resources/grn/results resources/results/ --delete\n",
"# aws s3 sync s3://openproblems-data/resources/grn/grn_models resources/grn_models/\n",
"# aws s3 sync s3://openproblems-data/resources/grn/inference_datasets/ resources/grn_benchmark/inference_datasets/\n",
"# aws s3 sync s3://openproblems-data/resources/grn/evaluation_datasets/ resources/grn_benchmark/evaluation_datasets//"
"# aws s3 sync s3://openproblems-data/resources/grn/evaluation_data/ resources/grn_benchmark/evaluation_data//"
]
},
{
Expand All @@ -193,7 +222,7 @@
},
{
"cell_type": "code",
"execution_count": 1,
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
Expand Down Expand Up @@ -228,26 +257,14 @@
" 'scores_dir': 'resources/scores'\n",
"}\n",
"\n",
"datasets = ['op', 'replogle2', 'nakatake', 'norman', 'adamson']"
"datasets = ['op', 'replogle', 'nakatake', 'norman', 'adamson']"
]
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": 5,
"metadata": {},
"outputs": [
{
"ename": "NameError",
"evalue": "name 'datasets' is not defined",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[0;32mIn[3], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m dataset \u001b[38;5;129;01min\u001b[39;00m \u001b[43mdatasets\u001b[49m:\n\u001b[1;32m 2\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m method \u001b[38;5;129;01min\u001b[39;00m par[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mmethods\u001b[39m\u001b[38;5;124m'\u001b[39m]:\n\u001b[1;32m 3\u001b[0m file_name \u001b[38;5;241m=\u001b[39m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mresources/grn_models/\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mdataset\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m/\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mmethod\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m.csv\u001b[39m\u001b[38;5;124m'\u001b[39m\n",
"\u001b[0;31mNameError\u001b[0m: name 'datasets' is not defined"
]
}
],
"outputs": [],
"source": [
"for dataset in datasets:\n",
" for method in par['methods']:\n",
Expand All @@ -261,33 +278,46 @@
},
{
"cell_type": "code",
"execution_count": 2,
"execution_count": 3,
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/home/jnourisa/miniconda3/envs/py10/lib/python3.10/site-packages/anndata/_core/aligned_df.py:68: ImplicitModificationWarning: Transforming to str index.\n",
" warnings.warn(\"Transforming to str index.\", ImplicitModificationWarning)\n",
"/home/jnourisa/miniconda3/envs/py10/lib/python3.10/site-packages/anndata/_core/aligned_df.py:68: ImplicitModificationWarning: Transforming to str index.\n",
" warnings.warn(\"Transforming to str index.\", ImplicitModificationWarning)\n"
]
}
],
"source": [
"import anndata as ad\n",
"adata = ad.read_h5ad('K562_gwps_raw_singlecell.h5ad', backed='r')"
"adata = ad.read_h5ad('output/score_op_scprint.h5ad')"
]
},
{
"cell_type": "code",
"execution_count": 1,
"execution_count": 5,
"metadata": {},
"outputs": [
{
"ename": "NameError",
"evalue": "name 'adata' is not defined",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[0;32mIn[1], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43madata\u001b[49m\u001b[38;5;241m.\u001b[39mobs\n",
"\u001b[0;31mNameError\u001b[0m: name 'adata' is not defined"
]
"data": {
"text/plain": [
"{'dataset_id': 'op',\n",
" 'method_id': 'reg2-scprint',\n",
" 'metric_ids': array(['reg2-theta-0.0', 'reg2-theta-0.5', 'reg2-theta-1.0'], dtype=object),\n",
" 'metric_values': array([0.23660427, 0.28158203, 0.31389769])}"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"adata.obs"
"adata.uns"
]
},
{
Expand Down Expand Up @@ -1039,7 +1069,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"### replogle2"
"### replogle"
]
},
{
Expand Down Expand Up @@ -1257,7 +1287,7 @@
}
],
"source": [
"df_scores = pd.read_csv(f\"resources/scores/replogle2/50000-skeleton_False-binarize_True-ridge.csv\", index_col=0)\n",
"df_scores = pd.read_csv(f\"resources/scores/replogle/50000-skeleton_False-binarize_True-ridge.csv\", index_col=0)\n",
"# df_scores[df_scores<0] = 0\n",
"\n",
"df_scores_f = df_scores[['static-theta-0.0', 'static-theta-0.5', 'static-theta-1.0']]\n",
Expand Down Expand Up @@ -3801,7 +3831,7 @@
],
"metadata": {
"kernelspec": {
"display_name": "py10",
"display_name": "base",
"language": "python",
"name": "python3"
},
Expand All @@ -3815,7 +3845,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.14"
"version": "3.10.15"
}
},
"nbformat": 4,
Expand Down
2 changes: 1 addition & 1 deletion scripts/run_benchmark_all.sh
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ param_list:
- id: ${reg_type}
metric_ids: $metric_ids
method_ids: $method_ids
evaluation_data: ${resources_dir}/evaluation_datasets/${dataset}_perturbation.h5ad
evaluation_data: ${resources_dir}/evaluation_data/${dataset}.h5ad
rna: ${resources_dir}/inference_datasets/${dataset}_rna.h5ad
atac: ${resources_dir}/inference_datasets/${dataset}_atac.h5ad
reg_type: $reg_type
Expand Down
4 changes: 2 additions & 2 deletions scripts/run_grn_evaluation.sh
Original file line number Diff line number Diff line change
Expand Up @@ -57,8 +57,8 @@ append_entry() {
cat >> $param_file << HERE
- id: ${reg_type}_${1}
metric_ids: ${metric_ids}
evaluation_data: ${resources_dir}/evaluation_datasets/${dataset}_perturbation.h5ad
evaluation_data_sc: ${resources_dir}/evaluation_datasets/${dataset}_sc_counts.h5ad
evaluation_data: ${resources_dir}/evaluation_data/${dataset}.h5ad
evaluation_data_sc: ${resources_dir}/evaluation_data/${dataset}_sc_counts.h5ad
reg_type: $reg_type
method_id: $1
num_workers: $num_workers
Expand Down
4 changes: 2 additions & 2 deletions scripts/single_grn_evaluation.sh
Original file line number Diff line number Diff line change
Expand Up @@ -11,5 +11,5 @@ viash run src/metrics/all_metrics/config.novsh.yaml -- \
--regulators_consensus resources/grn_benchmark/prior/regulators_consensus_${dataset_id}.json \
--ws_consensus resources/grn_benchmark/prior/ws_consensus_${dataset_id}.csv \
--ws_distance_background resources/grn_benchmark/prior/ws_distance_background_${dataset_id}.csv \
--evaluation_data_sc resources/grn_benchmark/evaluation_datasets//${dataset_id}_sc_counts.h5ad \
--evaluation_data resources/grn_benchmark/evaluation_datasets//${dataset_id}_perturbation.h5ad
--evaluation_data_sc resources/grn_benchmark/evaluation_data//${dataset_id}_sc_counts.h5ad \
--evaluation_data resources/grn_benchmark/evaluation_data//${dataset_id}.h5ad
2 changes: 1 addition & 1 deletion src/api/file_evaluation_h5ad.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
type: file
example: resources_test/grn_benchmark/evaluation_datasets/op_bulk.h5ad
example: resources_test/grn_benchmark/evaluation_data/op_bulk.h5ad
label: perturbation data
summary: "Perturbation dataset for benchmarking."

Expand Down
2 changes: 1 addition & 1 deletion src/control_methods/pearson_corr/script.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

## VIASH START
par = {
'rna': 'resources/grn_benchmark/evaluation_datasets//op_rna.h5ad',
'rna': 'resources/grn_benchmark/evaluation_data//op_rna.h5ad',
'tf_all': 'resources/grn_benchmark/prior/tf_all.csv',
'cell_type_specific': False,
'max_n_links': 50000,
Expand Down
8 changes: 4 additions & 4 deletions src/helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ def analyse_meta_cells(task_grn_inference_dir):

par = {
'rna': f'{task_grn_inference_dir}/resources/grn_benchmark/inference_datasets/{dataset}_rna.h5ad',
"evaluation_data": f"{task_grn_inference_dir}/resources/grn_benchmark/evaluation_datasets//{dataset}_perturbation.h5ad",
"evaluation_data": f"{task_grn_inference_dir}/resources/grn_benchmark/evaluation_data//{dataset}.h5ad",

'layer': 'X_norm',
'consensus': f'{task_grn_inference_dir}/resources/grn_benchmark/prior/{dataset}_consensus-num-regulators.json',
Expand Down Expand Up @@ -124,7 +124,7 @@ def analyse_imputation(task_grn_inference_dir):

par = {
'rna': f'{task_grn_inference_dir}/resources/grn_benchmark/inference_datasets/{dataset}_rna.h5ad',
"evaluation_data": f"{task_grn_inference_dir}/resources/grn_benchmark/evaluation_datasets//{dataset}_perturbation.h5ad",
"evaluation_data": f"{task_grn_inference_dir}/resources/grn_benchmark/evaluation_data//{dataset}.h5ad",

'layer': 'X_norm',
'consensus': f'{task_grn_inference_dir}/resources/grn_benchmark/prior/{dataset}_consensus-num-regulators.json',
Expand Down Expand Up @@ -202,10 +202,10 @@ def analyse_imputation(task_grn_inference_dir):
scores_all.to_csv(f"{par['temp_dir']}/scores_all.csv")

def analyse_corr_vs_tfmasked_corr(task_grn_inference_dir):
for i_run, dataset in enumerate(['op', 'replogle2', 'nakatake', 'norman', 'adamson']):
for i_run, dataset in enumerate(['op', 'replogle', 'nakatake', 'norman', 'adamson']):
par = {
'rna': f'{task_grn_inference_dir}/resources/grn_benchmark/inference_datasets/{dataset}_rna.h5ad',
"evaluation_data": f"{task_grn_inference_dir}/resources/grn_benchmark/evaluation_datasets//{dataset}_perturbation.h5ad",
"evaluation_data": f"{task_grn_inference_dir}/resources/grn_benchmark/evaluation_data//{dataset}.h5ad",

'layer': 'X_norm',
'consensus': f'{task_grn_inference_dir}/resources/grn_benchmark/prior/{dataset}_consensus-num-regulators.json',
Expand Down
2 changes: 1 addition & 1 deletion src/metrics/all_metrics/helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ def run_consensus(datasets):
for dataset in datasets:
par = {
'models': models,
'evaluation_data': f'resources/grn_benchmark/evaluation_datasets//{dataset}_perturbation.h5ad',
'evaluation_data': f'resources/grn_benchmark/evaluation_data//{dataset}.h5ad',
'evaluation_data_sc': f'resources/datasets_raw/{dataset}_sc_counts.h5ad',
'models_dir': f'resources/grn_models/{dataset}/',
'regulators_consensus': f'resources/grn_benchmark/prior/regulators_consensus_{dataset}.json',
Expand Down
14 changes: 14 additions & 0 deletions src/metrics/all_metrics/run.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@


python src/metrics/all_metrics/script.py \
--run_local \
--prediction resources/grn_models/op/scprint.csv \
# --dataset_id op \
# --method_id scprint \
# --evaluation_data resources/grn_benchmark/evaluation_data/op_bulk.h5ad \
# --evaluation_data_sc resources/grn_benchmark/evaluation_data/op_sc.h5ad \
# --regulators_consensus resources/grn_benchmark/prior/regulators_consensus_op.json \
# --ws_consensus resources/grn_benchmark/prior/ws_consensus_op.json \
# --ws_distance_background resources/grn_benchmark/prior/ws_distance_background_op.json \
# --score output/score_op_scprint.h5ad \

18 changes: 9 additions & 9 deletions src/metrics/all_metrics/script.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,14 @@
argparser = argparse.ArgumentParser()
argparser.add_argument('--run_local', action='store_true', help='Run locally')
argparser.add_argument('--prediction', help='Path to the GRN prediction file')
argparser.add_argument('--evaluation_dataset')
argparser.add_argument('--evaluation_dataset_sc')
argparser.add_argument('--regulators_consensus')
argparser.add_argument('--ws_consensus')
argparser.add_argument('--ws_distance_background')
argparser.add_argument('--method_id', help='Method ID')
argparser.add_argument('--dataset_id', help='Dataset ID')
argparser.add_argument('--score', help='Where to store scores')
# argparser.add_argument('--evaluation_data')
# argparser.add_argument('--evaluation_data_sc')
# argparser.add_argument('--regulators_consensus')
# argparser.add_argument('--ws_consensus')
# argparser.add_argument('--ws_distance_background')
# argparser.add_argument('--method_id', help='Method ID')
# argparser.add_argument('--dataset_id', help='Dataset ID')
# argparser.add_argument('--score', help='Where to store scores')

par_local = vars(argparser.parse_args())

Expand Down Expand Up @@ -78,7 +78,7 @@ def main(par):
assert par['dataset_id']
dataset = par['dataset_id']

# par['evaluation_data'] = f'resources/grn_benchmark/evaluation_datasets/{dataset}_perturbation.h5ad'
# par['evaluation_data'] = f'resources/grn_benchmark/evaluation_data/{dataset}.h5ad'
# par['evaluation_data_sc'] = f'resources/datasets_raw/{dataset}_sc_counts.h5ad'
# par['regulators_consensus'] = f'resources/grn_benchmark/prior/regulators_consensus_{dataset}.json'
# par['ws_consensus'] = f'resources/grn_benchmark/prior/ws_consensus_{dataset}.csv'
Expand Down
6 changes: 3 additions & 3 deletions src/metrics/all_metrics/script_all.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ def run_evaluation(dataset, models, models_dir, save_file_name):
run_scores_flag = True
run_consensus_flag = False
run_ws_distance_background_flag = False
datasets = ['op', 'replogle2', 'nakatake', 'norman', 'adamson']
datasets = ['op', 'replogle', 'nakatake', 'norman', 'adamson']

if run_consensus_flag: # run consensus
run_consensus(datasets)
Expand All @@ -88,7 +88,7 @@ def run_evaluation(dataset, models, models_dir, save_file_name):
run_evaluation(dataset, models, models_dir, scores_dir, save_file_name)

if True: # subsample
# for dataset in ['op', 'replogle2', 'nakatake', 'norman', 'adamson']: #'op', 'replogle2', 'nakatake', 'norman', 'adamson'
# for dataset in ['op', 'replogle', 'nakatake', 'norman', 'adamson']: #'op', 'replogle', 'nakatake', 'norman', 'adamson'
for dataset in ['op']:
if dataset == 'op':
models_subsampled = [f'{model}_{subsample}' for subsample in [1, 2] for model in models]
Expand Down Expand Up @@ -146,7 +146,7 @@ def run_evaluation(dataset, models, models_dir, save_file_name):
# def define_par(dataset):

# par = {
# "evaluation_data": f"resources/grn_benchmark/evaluation_datasets//{dataset}_perturbation.h5ad",
# "evaluation_data": f"resources/grn_benchmark/evaluation_data//{dataset}.h5ad",
# 'consensus': f'resources/grn_benchmark/prior/{dataset}_consensus-num-regulators.json',

# 'layer': 'X_norm',
Expand Down
Loading

0 comments on commit cb0cafc

Please sign in to comment.