Skip to content

Commit

Permalink
reg2 modified
Browse files Browse the repository at this point in the history
  • Loading branch information
janursa committed Aug 12, 2024
1 parent 8f536d5 commit 0e2b73a
Show file tree
Hide file tree
Showing 51 changed files with 38 additions and 24 deletions.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
4 changes: 0 additions & 4 deletions src/metrics/regression_1/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,10 +136,6 @@ def set_global_seed(seed):
lightgbm.LGBMRegressor().set_params(random_state=seed)


def format_folder(work_dir, exclude_missing_genes, reg_type, theta, tf_n, norm_method, subsample=None):
return f'{work_dir}/benchmark/scores/subsample_{subsample}/exclude_missing_genes_{exclude_missing_genes}/{reg_type}/theta_{theta}_tf_n_{tf_n}/{norm_method}'


def pivot_grn(net):
''' make net to have gene*tf format'''
df_tmp = net.pivot(index='target', columns='source', values='weight')
Expand Down
7 changes: 0 additions & 7 deletions src/metrics/regression_1/script.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,6 @@

metric_ids = output.columns.to_numpy()
metric_values = output.values[0]
# if metric_ids.ndim == 1:
# metric_ids = metric_ids.reshape(1, -1)
# if metric_values.ndim == 1:
# metric_values = metric_values.reshape(1, -1)

print(metric_ids.shape, metric_values.shape)
output = ad.AnnData(
Expand All @@ -45,7 +41,4 @@
}
)


# print(output.uns)

output.write_h5ad(par["score"], compression="gzip")
3 changes: 1 addition & 2 deletions src/metrics/regression_2/config.vsh.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,11 @@ functionality:
path: script.py
- path: main.py
arguments:
- name: --layer
type: string
- name: --consensus
type: file
direction: input
must_exist: true
default: 'resources/grn-benchmark/consensus-num-regulators.json'
platforms:
- type: docker
image: ghcr.io/openproblems-bio/base_python:1.0.4
Expand Down
5 changes: 4 additions & 1 deletion src/metrics/regression_2/consensus/create-consensus.sh
Original file line number Diff line number Diff line change
@@ -1 +1,4 @@
viash build src/metrics/regression_2/consensus/config.vsh.yaml --platform docker -o bin/regression_2/consensus && bin/regression_2/consensus/consensus_for_regression_2 --perturbation_data resources/grn-benchmark/perturbation_data.h5ad --output resources/grn-benchmark/consensus-num-regulators.json --grn_folder resources/grn-benchmark/grn_models/ --grns ananse.csv,celloracle.csv,figr.csv,granie.csv,scenicplus.csv,scglue.csv
viash run src/metrics/regression_2/consensus/config.novsh.yaml -- --perturbation_data resources/grn-benchmark/perturbation_data.h5ad \
--output resources/grn-benchmark/consensus-num-regulators.json \
--grn_folder resources/grn_models/ \
--grns ananse.csv,celloracle.csv,figr.csv,granie.csv,scenicplus.csv,scglue.csv
20 changes: 14 additions & 6 deletions src/metrics/regression_2/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ def cross_validate_gene(
if estimator_t == 'ridge':
model = Ridge(random_state=random_state)
elif estimator_t == 'GB':
model = lightgbm.LGBMRegressor(verbosity=-1, n_estimators=100, n_jobs=4, random_state=random_state)
model = lightgbm.LGBMRegressor(verbosity=-1, n_estimators=100, n_jobs=par["max_workers"], random_state=random_state)
else:
raise NotImplementedError(f'Unknown model "{estimator_t}"')

Expand Down Expand Up @@ -209,17 +209,24 @@ def main(par: Dict[str, Any]) -> pd.DataFrame:

# Load perturbation data
perturbation_data = ad.read_h5ad(par['perturbation_data'])
subsample = par['subsample']
if subsample != -1:
perturbation_data = perturbation_data[np.random.choice(perturbation_data.n_obs, subsample, replace=False), :]
gene_names = perturbation_data.var.index.to_numpy()
n_genes = len(gene_names)
groups = LabelEncoder().fit_transform(perturbation_data.obs.plate_name)

# Load inferred GRN
print(f'Loading GRN', flush=True)
grn = load_grn(par['prediction'], gene_names)



# Load and standardize perturbation data
layer = par['layer']
X = perturbation_data.layers[layer]
print(X.shape)

X = RobustScaler().fit_transform(X)

# Load consensus numbers of putative regulators
Expand All @@ -232,26 +239,27 @@ def main(par: Dict[str, Any]) -> pd.DataFrame:
# Evaluate GRN
print(f'Compute metrics for layer: {layer}', flush=True)
print(f'Dynamic approach:', flush=True)
score_dynamic = dynamic_approach(grn, X, groups, gene_names, par['reg_type'])
# score_dynamic = dynamic_approach(grn, X, groups, gene_names, par['reg_type'])
print(f'Static approach (theta=0):', flush=True)
score_static_min = static_approach(grn, n_features_theta_min, X, groups, gene_names, par['reg_type'])
print(f'Static approach (theta=0.5):', flush=True)
score_static_median = static_approach(grn, n_features_theta_median, X, groups, gene_names, par['reg_type'])
print(f'Static approach (theta=1):', flush=True)
score_static_max = static_approach(grn, n_features_theta_max, X, groups, gene_names, par['reg_type'])
score_overall = score_dynamic + score_static_min + score_static_median + score_static_max
# score_overall = score_dynamic + score_static_min + score_static_median + score_static_max
# TODO: find a mathematically sound way to combine Z-scores and r2 scores

print(f'Score on {layer}: {score_overall}')
# print(f'Score on {layer}: {score_overall}')
results = {
'static-theta-0.0': [score_static_min],
'static-theta-0.5': [score_static_median],
'static-theta-1.0': [score_static_max],
'dynamic': [score_dynamic],
'Overall': [score_overall],
# 'dynamic': [score_dynamic],
# 'Overall': [score_overall],
}

# Convert results to DataFrame
df_results = pd.DataFrame(results)
df_results['Mean'] = df_results.mean(axis=1)

return df_results
22 changes: 18 additions & 4 deletions src/metrics/regression_2/script.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,11 @@
par = {
'perturbation_data': 'resources/grn-benchmark/perturbation_data.h5ad',
'layer': 'scgen_pearson',
'prediction': 'resources/grn-benchmark/negative_control.csv',
'prediction': 'resources/grn_models/collectri.csv',
'consensus': 'resources/grn-benchmark/consensus-num-regulators.json',
'score': 'output/score_regression2.csv',
'reg_type': 'ridge'
'reg_type': 'ridge',

}
## VIASH END

Expand All @@ -27,6 +28,19 @@
output = main(par)

print('Write output to file', flush=True)
print(output)
output.to_csv(par['score'])
metric_ids = output.columns.to_numpy()
metric_values = output.values[0]

print(metric_ids.shape, metric_values.shape)
output = ad.AnnData(
X=np.empty((0, 0)),
uns={
"dataset_id": par["layer"],
"method_id": f"reg2-{par['method_id']}",
"metric_ids": metric_ids,
"metric_values": metric_values
}
)

output.write_h5ad(par["score"], compression="gzip")
print('Completed', flush=True)
1 change: 1 addition & 0 deletions src/workflows/run_grn_evaluation/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ workflow run_wf {
// construct list of metrics
metrics = [
regression_1
regression_2
]

/***************************
Expand Down

0 comments on commit 0e2b73a

Please sign in to comment.